| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[getFpTransforms.R] by DSB Fre 15/06/2012 16:05 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Transform a variable according to FP transformation formula and attach proper |
|
| 9 |
## names to the resulting design matrix. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 06/01/2010 copy from package bfp, slightly pimp and add Roxygen stuff |
|
| 13 |
## 15/06/2012 simplify %bt% documentation for new roxygen package |
|
| 14 |
##################################################################################### |
|
| 15 | ||
| 16 | ||
| 17 |
##' Box Tidwell transformation |
|
| 18 |
##' |
|
| 19 |
##' Simple function to apply the Box Tidwell transformation to a vector, when a (single) power is |
|
| 20 |
##' provided. |
|
| 21 |
##' |
|
| 22 |
##' @param x the numeric vector |
|
| 23 |
##' @param pow the single power |
|
| 24 |
##' @return the transformed vector |
|
| 25 |
##' |
|
| 26 |
##' @name boxTidwell |
|
| 27 |
##' @aliases boxTidwell '%bt%' |
|
| 28 |
##' @keywords utilities internal |
|
| 29 |
"%bt%" <- function(x, |
|
| 30 |
pow) {
|
|
| 31 |
## checks |
|
| 32 | 18x |
stopifnot( |
| 33 | 18x |
identical(length(pow), 1L), |
| 34 | 18x |
is.numeric(x), |
| 35 | 18x |
is.numeric(pow) |
| 36 |
) |
|
| 37 | ||
| 38 |
## transformation |
|
| 39 | 18x |
if (pow != 0) {
|
| 40 | 16x |
x^pow |
| 41 |
} else { ## pow == 0
|
|
| 42 | 2x |
log(x) |
| 43 |
} |
|
| 44 |
} |
|
| 45 | ||
| 46 | ||
| 47 |
##' Get the FP transforms matrix of a given covariate vector |
|
| 48 |
##' |
|
| 49 |
##' Get the (centered) FP transforms matrix of a given covariate vector, when the corresponding |
|
| 50 |
##' power vector (with at least one element) is provided. |
|
| 51 |
##' |
|
| 52 |
##' @param vec positive (== already shifted and scaled) column vector with proper colname |
|
| 53 |
##' @param powers power vector with at least one element |
|
| 54 |
##' @param center center the columns of the FP transforms matrix around zero? (default) |
|
| 55 |
##' @return the FP transforms matrix with proper colnames. |
|
| 56 |
##' |
|
| 57 |
##' @keywords utilities internal |
|
| 58 |
getFpTransforms <- function(vec, |
|
| 59 |
powers, |
|
| 60 |
center = TRUE) {
|
|
| 61 |
## internal helper function, only required here: |
|
| 62 | 18x |
getTransformName <- function(name, pow) {
|
| 63 |
## checks |
|
| 64 | 18x |
stopifnot( |
| 65 | 18x |
identical(length(pow), 1L), |
| 66 | 18x |
is.character(name) |
| 67 |
) |
|
| 68 | ||
| 69 |
## create name |
|
| 70 | 18x |
if (pow != 0) {
|
| 71 | 16x |
paste(name, pow, sep = "^") |
| 72 | 18x |
} else { ## pow == 0
|
| 73 | 2x |
paste0("log(", name, ")")
|
| 74 |
} |
|
| 75 |
} |
|
| 76 | ||
| 77 |
## get properties of arguments |
|
| 78 | 18x |
name <- colnames(vec) |
| 79 | 18x |
vec <- as.vector(vec) |
| 80 | 18x |
len <- length(vec) |
| 81 | 18x |
np <- length(powers) |
| 82 | ||
| 83 |
## start return matrix |
|
| 84 | 18x |
ret <- matrix(nrow = len, ncol = np) |
| 85 | 18x |
retColnames <- character(np) |
| 86 | ||
| 87 |
## begin of recursion: |
|
| 88 | ||
| 89 |
## start with vector of ones |
|
| 90 | 18x |
lastCol <- 1 |
| 91 | ||
| 92 |
## and power "log" |
|
| 93 | 18x |
lastPow <- 0 |
| 94 | ||
| 95 |
## invariant: already numRepPow times was this power repeated |
|
| 96 | 18x |
numRepPow <- 0L |
| 97 | ||
| 98 |
## invariant: about to write ith column |
|
| 99 | 18x |
for (i in seq_along(powers)) {
|
| 100 |
## extract this power into "pi", |
|
| 101 |
## check if it is the same as the last power and (afterwards) if we are *not* at the first power. |
|
| 102 | 18x |
if ((pi <- powers[i]) == lastPow && i != 1L) {
|
| 103 |
## repeated powers case |
|
| 104 | ! |
numRepPow <- numRepPow + 1L |
| 105 | ||
| 106 |
## so we apply the rule for repeated powers |
|
| 107 | ! |
lastCol <- lastCol * log(vec) |
| 108 | ||
| 109 |
## the name is a bit complicated |
|
| 110 | ! |
retColnames[i] <- |
| 111 | ! |
if (pi == 0) {
|
| 112 |
## log was repeated |
|
| 113 | ! |
paste(getTransformName(name, 0), |
| 114 | ! |
numRepPow + 1L, |
| 115 | ! |
sep = "^" |
| 116 |
) |
|
| 117 |
} else {
|
|
| 118 |
## other power was repeated |
|
| 119 | ! |
tmp <- paste(getTransformName(name, pi), |
| 120 | ! |
getTransformName(name, 0), |
| 121 | ! |
sep = "*" |
| 122 |
) |
|
| 123 | ||
| 124 | ! |
if (numRepPow > 1L) {
|
| 125 | ! |
tmp <- paste(tmp, numRepPow, sep = "^") |
| 126 |
} |
|
| 127 | ||
| 128 | ! |
tmp |
| 129 |
} |
|
| 130 |
} else {
|
|
| 131 |
## normal case without repeated power: |
|
| 132 | ||
| 133 |
## vector results from Box Tidwell transform |
|
| 134 | 18x |
lastCol <- vec %bt% pi |
| 135 | ||
| 136 |
## and the name is easy |
|
| 137 | 18x |
retColnames[i] <- getTransformName(name, pi) |
| 138 | ||
| 139 |
## reset power invariants |
|
| 140 | 18x |
numRepPow <- 0L |
| 141 | 18x |
lastPow <- pi |
| 142 |
} |
|
| 143 | ||
| 144 |
## write the last column |
|
| 145 | 18x |
ret[, i] <- lastCol |
| 146 |
} |
|
| 147 | ||
| 148 |
## optionally center the transforms matrix |
|
| 149 | 18x |
if (isTRUE(center)) {
|
| 150 | 16x |
ret <- scale(ret, center = TRUE, scale = FALSE) |
| 151 |
} |
|
| 152 | ||
| 153 |
## attach proper colnames |
|
| 154 | 18x |
colnames(ret) <- retColnames |
| 155 | ||
| 156 |
## and finally return that. |
|
| 157 | 18x |
return(ret) |
| 158 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[hpds.R] by DSB Die 25/05/2010 21:32 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## HPD functions |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 07/01/2010 copy + modify from package bfp |
|
| 12 |
## 25/05/2010 change layout of samples argument in scrHpd |
|
| 13 |
##################################################################################### |
|
| 14 | ||
| 15 |
##' Construct an empirical HPD interval from samples |
|
| 16 |
##' |
|
| 17 |
##' Construct an empirical highest posterior density (HPD) interval from |
|
| 18 |
##' samples which have been drawn from the distribution of a quantity of |
|
| 19 |
##' interest. |
|
| 20 |
##' |
|
| 21 |
##' @param theta the vector of samples |
|
| 22 |
##' @param level the credible level |
|
| 23 |
##' @return A vector with the estimated lower and upper bounds of the HPD |
|
| 24 |
##' interval. |
|
| 25 |
##' |
|
| 26 |
##' @seealso \code{\link{scrHpd}}
|
|
| 27 |
##' @keywords htest |
|
| 28 |
##' @export |
|
| 29 |
empiricalHpd <- function(theta, |
|
| 30 |
level) {
|
|
| 31 |
## check that theta is numeric, and that level is in (0, 1) |
|
| 32 | 1x |
stopifnot( |
| 33 | 1x |
is.numeric(theta), |
| 34 | 1x |
0 < level && 1 > level |
| 35 |
) |
|
| 36 | ||
| 37 |
## how many samples are saved in theta? |
|
| 38 | 1x |
nSamples <- length(theta) |
| 39 | ||
| 40 |
## get the sorted samples vector |
|
| 41 | 1x |
thetaSorted <- sort.int(theta, method = "quick") |
| 42 | ||
| 43 |
## how many different credible intervals with "level" |
|
| 44 |
## do we need to compare? |
|
| 45 | 1x |
nIntervals <- ceiling(nSamples * (1 - level)) |
| 46 | ||
| 47 |
## these are the start indexes of the intervals |
|
| 48 | 1x |
startIndexes <- seq_len(nIntervals) |
| 49 | ||
| 50 |
## and these are the end indexes of the intervals |
|
| 51 | 1x |
endIndexes <- nSamples - nIntervals + startIndexes |
| 52 | ||
| 53 |
## which interval has the smallest range? |
|
| 54 | 1x |
smallestInterval <- which.min(thetaSorted[endIndexes] - thetaSorted[startIndexes]) |
| 55 | ||
| 56 |
## then return the bounds of this smallest interval |
|
| 57 | 1x |
return(c( |
| 58 | 1x |
lower = thetaSorted[startIndexes[smallestInterval]], |
| 59 | 1x |
upper = thetaSorted[endIndexes[smallestInterval]] |
| 60 |
)) |
|
| 61 |
} |
|
| 62 | ||
| 63 | ||
| 64 |
##' Calculate an SCB from a samples matrix |
|
| 65 |
##' |
|
| 66 |
##' Calculate an SCB from a samples matrix, which minimizes |
|
| 67 |
##' the absolute distances of the contained samples to a mode vector, at |
|
| 68 |
##' each gridpoint. Therefore the SCB might be considered an \dQuote{HPD
|
|
| 69 |
##' SCB}. |
|
| 70 |
##' |
|
| 71 |
##' @param samples m by n matrix where m is the number of parameters, |
|
| 72 |
##' n is the number of samples and hence each (multivariate) sample is a column in |
|
| 73 |
##' the matrix \code{samples}
|
|
| 74 |
##' @param mode mode vector of length m (defaults to the vector of medians) |
|
| 75 |
##' @param level credible level for the SCB (default: 0.95) |
|
| 76 |
##' @return A matrix with columns \dQuote{lower} and \dQuote{upper}, with the
|
|
| 77 |
##' lower and upper SCB bounds, respectively. |
|
| 78 |
##' |
|
| 79 |
##' @references Besag, J.; Green, P.; Higdon, D. & Mengersen, K. (1995): |
|
| 80 |
##' \dQuote{Bayesian computation and stochastic systems (with
|
|
| 81 |
##' discussion)}, \emph{Statistical Science}, 10, 3-66.
|
|
| 82 |
##' @seealso \code{\link{empiricalHpd}}
|
|
| 83 |
##' @keywords htest multivariate |
|
| 84 |
##' @export |
|
| 85 |
scrHpd <- function(samples, |
|
| 86 |
mode = apply(samples, 1, median), |
|
| 87 |
level = 0.95) {
|
|
| 88 |
## extracts |
|
| 89 | 1x |
nPars <- nrow(samples) # the number of parameters |
| 90 | 1x |
nSamples <- ncol(samples) # the number of samples |
| 91 | ||
| 92 |
## checks |
|
| 93 | 1x |
if (nPars != length(mode)) {
|
| 94 | ! |
stop("mode vector must have same length as samples matrix!")
|
| 95 |
} |
|
| 96 | 1x |
stopifnot(level > 0 && level < 1) |
| 97 | ||
| 98 |
## absolute distance from mode vector |
|
| 99 | 1x |
distance <- abs(sweep(samples, 1, mode)) |
| 100 | ||
| 101 |
## Calculate a simultaneous (k/ nSamples)*100% credible band |
|
| 102 |
## using the ranks approach: |
|
| 103 | 1x |
k <- floor(level * nSamples) |
| 104 | ||
| 105 |
## rowwise (= parameterwise) ranks of distances |
|
| 106 | 1x |
rankdistance <- apply(distance, 1, rank) |
| 107 | ||
| 108 |
## maximum ranks in each multivariate sample |
|
| 109 | 1x |
tstari <- apply(rankdistance, 1, max) |
| 110 | ||
| 111 |
## sort the maximum ranks |
|
| 112 | 1x |
ordtstari <- sort.int(tstari, method = "quick") |
| 113 | ||
| 114 |
## the required rank, which divides the samples in contained and rejected samples for the SCB, |
|
| 115 |
## is: |
|
| 116 | 1x |
tstar <- ordtstari[k] |
| 117 | ||
| 118 |
## now which vectors are inside the SCB? |
|
| 119 | 1x |
whichInside <- tstari <= tstar |
| 120 |
## note that sum(whichInside) is possibly larger than k, so we have |
|
| 121 |
## a larger empirical coverage of the resulting SCB. |
|
| 122 | ||
| 123 |
## reduce the samples matrix accordingly. |
|
| 124 | 1x |
samples <- samples[, whichInside] |
| 125 | ||
| 126 |
## the parameterwise ranges of these vectors form the SCB |
|
| 127 |
## (just the convex hull of all sample vectors!) |
|
| 128 | 1x |
ret <- t(apply(samples, 1, range)) |
| 129 | 1x |
colnames(ret) <- c("lower", "upper")
|
| 130 | ||
| 131 |
## finally return the (nPars x 2) matrix |
|
| 132 | 1x |
return(ret) |
| 133 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[glmBayesMfp.R] by DSB Mit 03/07/2013 22:57 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Main user interface for Bayesian inference for fractional polynomials in generalized linear |
|
| 9 |
## models and Cox models. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 26/10/2009 file creation: copy and modify the old BayesMfp.R |
|
| 13 |
## 27/10/2009 first only allow the binomial family, and write the code in an |
|
| 14 |
## extensible way, so that it later can be extended easily when binomial works |
|
| 15 |
## (11/12/2009 note: actually we now have generality for all GLM families.) |
|
| 16 |
## 06/11/2009 include group sizes |
|
| 17 |
## 09/11/2009 add Gauss Hermite quantiles and weights |
|
| 18 |
## 18/11/2009 progress towards first testable version of the marginal likelihood |
|
| 19 |
## approximation, |
|
| 20 |
## use roxygen after cleaning the package from old hyper-g stuff. |
|
| 21 |
## 01/12/2009 do not coerce model response to type double automatically, |
|
| 22 |
## because it could be a factor in the binomial case. |
|
| 23 |
## 11/12/2009 rewrite passing of info to C++ and storage of attributes info, |
|
| 24 |
## to make it clearer and facilitate reuse in other functions, e.g. sampleGlm. |
|
| 25 |
## 06/01/2010 extend getFamily to return self-crafted "simulate" function in the |
|
| 26 |
## return list. This is used e.g. by "sampleGlm". |
|
| 27 |
## 12/02/2010 for clarity, split off helper functions into their separate files, |
|
| 28 |
## so we need to include them in the preamble |
|
| 29 |
## 17/02/2010 split off null model information computation, and include the returned |
|
| 30 |
## list in the things passed to C++ (instead of only the log marginal |
|
| 31 |
## likelihood for the null model) |
|
| 32 |
## 15/03/2010 also pass weights to C++ via the family list, |
|
| 33 |
## new g-prior class is expected in the prior list |
|
| 34 |
## 12/04/2010 do not coerce totalNumber to integer but to double, as it is done in C++ |
|
| 35 |
## in the same manner and keeps the number space large enough. |
|
| 36 |
## 14/04/2010 add "useBfgs" and "largeVariance" options. |
|
| 37 |
## 17/05/2010 useBfgs=FALSE ("optimize") is now the default because it is more
|
|
| 38 |
## robust than Bfgs. Add "useOpenMP" option, which makes it easier |
|
| 39 |
## to switch than setting the environment variable OMP_NUM_THREADS. |
|
| 40 |
## 21/05/2010 be more careful when passing "fpnames" to C++ |
|
| 41 |
## 25/05/2010 add "nObs" to "data" attribute of return list. |
|
| 42 |
## 08/07/2010 add an empirical Bayes option which ranks the models in terms of |
|
| 43 |
## approximate *conditional* marginal likelihoods |
|
| 44 |
## 29/07/2010 add the new option to get a better Laplace approximation in the |
|
| 45 |
## case of binary logistic regression. |
|
| 46 |
## 08/07/2011 add the new modelPrior option "dependent" |
|
| 47 |
## 29/07/2011 now "higherOrderCorrection" |
|
| 48 |
## 15/02/2012 check that not two terms contain the same covariate in |
|
| 49 |
## the formula |
|
| 50 |
## 21/11/2012 add "tbf" option |
|
| 51 |
## 03/12/2012 add Cox regression |
|
| 52 |
## 10/12/2012 fix bug: order according to survival times when Cox model is |
|
| 53 |
## requested, otherwise the computed deviances are wrong!! |
|
| 54 |
## 06/02/2013 remove default for "family" |
|
| 55 |
## 03/07/2013 - add offsets |
|
| 56 |
## - remove getNullModelInfo |
|
| 57 |
## 26/05/2014 Added option (useFixedc) to calculate (or not) c factor using |
|
| 58 |
## mean of observations as in null model instead of alpha=0. |
|
| 59 |
##################################################################################### |
|
| 60 | ||
| 61 |
##' @include helpers.R |
|
| 62 |
##' @include formula.R |
|
| 63 |
##' @include fpScale.R |
|
| 64 |
##' @include GPrior-classes.R |
|
| 65 |
##' @include getFamily.R |
|
| 66 |
NULL |
|
| 67 | ||
| 68 | ||
| 69 |
##' Bayesian model inference for fractional polynomial GLMs and Cox models |
|
| 70 |
##' |
|
| 71 |
##' Bayesian model inference for fractional polynomial models from the generalized linear model |
|
| 72 |
##' family or the Cox model is conducted by means of either exhaustive model space evaluation or |
|
| 73 |
##' posterior model sampling. The approach is based on analytical marginal likelihood |
|
| 74 |
##' approximations, using integrated Laplace approximation. Alternatively, test-based Bayes factors |
|
| 75 |
##' (TBFs) are used. |
|
| 76 |
##' |
|
| 77 |
##' The formula is of the form \code{y ~ bfp (x1, max = 4) + uc (x2 + x3)}, that is, the auxiliary
|
|
| 78 |
##' functions \code{\link{bfp}} and \code{\link{uc}} must be used for defining the fractional
|
|
| 79 |
##' polynomial and uncertain fixed form covariates terms, respectively. There must be an intercept, |
|
| 80 |
##' and no other fixed covariates are allowed. All \code{max} arguments of the \code{\link{bfp}}
|
|
| 81 |
##' terms must be identical. \code{y} is the response vector for GLMs or the vector of survival
|
|
| 82 |
##' times for Cox regression. Note that Cox regression is only implemented with TBFs. |
|
| 83 |
##' |
|
| 84 |
##' The prior specifications are a list: |
|
| 85 |
##' \describe{
|
|
| 86 |
##' \item{gPrior}{A g-prior class object. Defaults to a hyper-g prior. See
|
|
| 87 |
##' \code{\linkS4class{GPrior}} for more information.}
|
|
| 88 |
##' \item{modelPrior}{choose if a flat model prior (\code{"flat"}), a
|
|
| 89 |
##' model prior favoring sparse models explicitly (default, \code{"sparse"}),
|
|
| 90 |
##' or a dependent model prior (\code{"dependent"}) should be used.}
|
|
| 91 |
##' } |
|
| 92 |
##' |
|
| 93 |
##' If \code{method = "ask"}, the user is prompted with the maximum cardinality of the model space
|
|
| 94 |
##' and can then decide whether to use posterior sampling or the exhaustive model space evaluation. |
|
| 95 |
##' |
|
| 96 |
##' Note that if you specify only one FP term, the exhaustive model search must be done, due to the |
|
| 97 |
##' structure of the model sampling algorithm. However, in reality this will not be a problem as the |
|
| 98 |
##' model space will typically be very small. |
|
| 99 |
##' @param formula model formula |
|
| 100 |
##' @param censInd censoring indicator. Default is \code{NULL}, but if a non-\code{NULL} vector is
|
|
| 101 |
##' supplied, this is assumed to be logical (\code{TRUE} = observed, \code{FALSE} = censored) and
|
|
| 102 |
##' Cox regression is performed. |
|
| 103 |
##' @param data optional data.frame for model variables (defaults to the parent frame) |
|
| 104 |
##' @param weights optionally a vector of positive weights (if not provided, a vector of one's) |
|
| 105 |
##' @param offset this can be used to specify an _a priori_ known component to be included in the |
|
| 106 |
##' linear predictor during fitting. This must be a numeric vector of length equal to the number |
|
| 107 |
##' of cases (if not provided, a vector of zeroes) |
|
| 108 |
##' @param family distribution and link (as in the glm function). Needs to be explicitly specified |
|
| 109 |
##' for all models except the Cox model. |
|
| 110 |
##' @param phi value of the dispersion parameter (defaults to 1) |
|
| 111 |
##' @param tbf Use TBF methodology to compute the marginal likelihood? (not default) Must be |
|
| 112 |
##' \code{TRUE} if Cox regression is done.
|
|
| 113 |
##' @param empiricalBayes rank the models in terms of \emph{conditional} marginal likelihood, using
|
|
| 114 |
##' an empirical Bayes estimate of g? (not default) Due to coding structure, the prior on g must |
|
| 115 |
##' be given in \code{priorSpecs} although it does not have an effect when
|
|
| 116 |
##' \code{empiricalBayes==TRUE}.
|
|
| 117 |
##' @param fixedg If this is a number, then it is taken as a fixed value of g, and as with the |
|
| 118 |
##' \code{empiricalBayes} option, the models are ranked in terms of conditional marginal
|
|
| 119 |
##' likelihood. By default, this option is \code{NULL}, which means that g is estimated in a fully
|
|
| 120 |
##' or empirical Bayesian way. |
|
| 121 |
##' @param priorSpecs prior specifications, see details |
|
| 122 |
##' @param method which method should be used to explore the posterior model space? (default: ask |
|
| 123 |
##' the user) |
|
| 124 |
##' @param subset optional subset expression |
|
| 125 |
##' @param na.action default is to skip rows with missing data, and no other option supported at the |
|
| 126 |
##' moment |
|
| 127 |
##' @param verbose should information on computation progress be given? (default) |
|
| 128 |
##' @param debug print debugging information? (not default) |
|
| 129 |
##' @param nModels how many best models should be saved? (default: 1\% of the total number of |
|
| 130 |
##' (cached) models). Must not be larger than \code{nCache} if \code{method == "sampling"}.
|
|
| 131 |
##' @param nCache maximum number of best models to be cached at the same time during the model |
|
| 132 |
##' sampling, only has effect if method = sampling |
|
| 133 |
##' @param chainlength length of the model sampling chain (only has an effect if sampling has been |
|
| 134 |
##' chosen as method) |
|
| 135 |
##' @param nGaussHermite number of quantiles used in Gauss Hermite quadrature for marginal |
|
| 136 |
##' likelihood approximation (and later in the MCMC sampler for the approximation of the marginal |
|
| 137 |
##' covariance factor density). If \code{empiricalBayes} or a fixed g is used, this option has no
|
|
| 138 |
##' effect. |
|
| 139 |
##' @param useBfgs Shall the BFGS algorithm be used in the internal maximization (not default)? |
|
| 140 |
##' Else, the default Brent optimize routine is used, which seems to be more robust. If |
|
| 141 |
##' \code{empiricalBayes} or a fixed g is used, this option has no effect and always the Brent
|
|
| 142 |
##' optimize routine is used. |
|
| 143 |
##' @param largeVariance When should the BFGS variance estimate be considered \dQuote{large}, so
|
|
| 144 |
##' that a reestimation of it is computed? (Only has an effect if \code{useBfgs == TRUE}, default:
|
|
| 145 |
##' 100) |
|
| 146 |
##' @param useOpenMP shall OpenMP be used to accelerate the computations? (default) |
|
| 147 |
##' @param higherOrderCorrection should a higher-order correction of the Laplace approximation be |
|
| 148 |
##' used, which works only for canonical GLMs? (not default) |
|
| 149 |
##' @param fixedcfactor If TRUE sets the c factor assuming alpha is set to 0. Otherwise take |
|
| 150 |
##' alpha=mean(y) |
|
| 151 |
##' @param empiricalgPrior If TRUE uses the the observed information matrix instead of X'X in the g |
|
| 152 |
##' prior. (Experimental) |
|
| 153 |
##' @param centerX Center the data before fitting (FALSE) |
|
| 154 |
##' |
|
| 155 |
##' @aliases glmBayesMfp GlmBayesMfp |
|
| 156 |
##' @return An object of S3 class \code{GlmBayesMfp}.
|
|
| 157 |
##' |
|
| 158 |
##' @keywords models regression |
|
| 159 |
##' @export |
|
| 160 |
glmBayesMfp <- function(formula = formula(data), |
|
| 161 |
censInd = NULL, |
|
| 162 |
data = parent.frame(), |
|
| 163 |
weights, |
|
| 164 |
offset, |
|
| 165 |
family, |
|
| 166 |
phi = 1, |
|
| 167 |
tbf = FALSE, |
|
| 168 |
empiricalBayes = FALSE, |
|
| 169 |
fixedg = NULL, |
|
| 170 |
priorSpecs = |
|
| 171 |
list( |
|
| 172 |
gPrior = HypergPrior(), |
|
| 173 |
modelPrior = "sparse" |
|
| 174 |
), |
|
| 175 |
method = c("ask", "exhaustive", "sampling"),
|
|
| 176 |
subset, |
|
| 177 |
na.action = na.omit, |
|
| 178 |
verbose = TRUE, |
|
| 179 |
debug = FALSE, |
|
| 180 |
nModels, |
|
| 181 |
nCache = 1e9, |
|
| 182 |
chainlength = 1e4, |
|
| 183 |
nGaussHermite = 20, |
|
| 184 |
useBfgs = FALSE, |
|
| 185 |
largeVariance = 100, |
|
| 186 |
useOpenMP = TRUE, |
|
| 187 |
higherOrderCorrection = FALSE, |
|
| 188 |
fixedcfactor = FALSE, |
|
| 189 |
empiricalgPrior = FALSE, |
|
| 190 |
centerX = TRUE) {
|
|
| 191 |
## checks |
|
| 192 | 15x |
stopifnot( |
| 193 | 15x |
is.bool(tbf), |
| 194 | 15x |
is.bool(verbose), |
| 195 | 15x |
is.bool(debug), |
| 196 | 15x |
is.bool(useBfgs), |
| 197 | 15x |
is.bool(empiricalBayes), |
| 198 | 15x |
is(priorSpecs$gPrior, "GPrior"), |
| 199 | 15x |
is.bool(useOpenMP), |
| 200 | 15x |
is.bool(higherOrderCorrection), |
| 201 | 15x |
is.bool(empiricalgPrior) |
| 202 |
) |
|
| 203 | ||
| 204 |
## see whether GLM or Cox is requested |
|
| 205 | 15x |
doGlm <- is.null(censInd) |
| 206 | 15x |
if (!doGlm) {
|
| 207 |
## and check censoring indicator vector in the Cox case. |
|
| 208 |
## Also: we only have Cox regression with TBFs! |
|
| 209 | 11x |
stopifnot( |
| 210 | 11x |
is.logical(censInd), |
| 211 | 11x |
tbf |
| 212 |
) |
|
| 213 | ||
| 214 |
## set family to Gaussian to have some pseudo values in there |
|
| 215 | 11x |
family <- gaussian |
| 216 |
} |
|
| 217 | ||
| 218 |
## see whether a fixed g is requested |
|
| 219 | 15x |
useFixedg <- !is.null(fixedg) |
| 220 | 15x |
if (useFixedg) {
|
| 221 |
## then check whether it is a valid g |
|
| 222 |
## and that there is no conflict with the empirical Bayes option |
|
| 223 | 3x |
stopifnot( |
| 224 | 3x |
is.numeric(fixedg), |
| 225 | 3x |
identical(length(fixedg), 1L), |
| 226 | 3x |
fixedg > 0, |
| 227 | 3x |
!empiricalBayes |
| 228 |
) |
|
| 229 |
} else {
|
|
| 230 | 12x |
fixedg <- 0 |
| 231 |
} |
|
| 232 | ||
| 233 |
## save call for return object |
|
| 234 | 15x |
call <- match.call() |
| 235 | 15x |
method <- match.arg(method) |
| 236 | ||
| 237 |
## check and evaluate Gauss Hermite stuff |
|
| 238 | 15x |
nGaussHermite <- as.integer(nGaussHermite) |
| 239 | 15x |
gaussHermite <- statmod::gauss.quad(n = nGaussHermite, kind = "hermite") |
| 240 | ||
| 241 |
## evaluate family, this list then also includes the dispersion |
|
| 242 | 15x |
family <- getFamily(family, phi) |
| 243 | ||
| 244 |
## get model prior choice |
|
| 245 | 15x |
priorSpecs$modelPrior <- match.arg(priorSpecs$modelPrior, |
| 246 | 15x |
choices = c("flat", "sparse", "dependent")
|
| 247 |
) |
|
| 248 | ||
| 249 |
## evaluate call for model frame building |
|
| 250 | 15x |
m <- match.call(expand.dots = FALSE) |
| 251 | ||
| 252 |
## select normal parts of the call |
|
| 253 | 15x |
temp <- c("", "formula", "data", "weights", "offset", "subset", "na.action") # "" is the function name
|
| 254 | 15x |
m <- m[match(temp, names(m), nomatch = 0)] |
| 255 | ||
| 256 |
## sort formula, so that bfp comes before uc |
|
| 257 |
## filter special parts in formula: uncertain covariates (uc) and (Bayesian) fractional polynomials (bfp) |
|
| 258 | 15x |
special <- c("uc", "bfp")
|
| 259 | ||
| 260 | 15x |
Terms <- if (missing(data)) {
|
| 261 | ! |
terms(formula, special) |
| 262 |
} else {
|
|
| 263 | 15x |
terms(formula, special, data = data) |
| 264 |
} |
|
| 265 | ||
| 266 | 15x |
tempVarNames <- rownames(attr(Terms, "factors")) |
| 267 | ||
| 268 |
## check if intercept is present |
|
| 269 | 15x |
if (!attr(Terms, "intercept")) {
|
| 270 | ! |
stop(simpleError("there must be an intercept term in the model formula"))
|
| 271 |
} |
|
| 272 | ||
| 273 | 15x |
ucTempVarNames <- sort(tempVarNames[attr(Terms, "specials")$uc]) |
| 274 | 15x |
bfpTempVarNames <- sort(tempVarNames[attr(Terms, "specials")$bfp]) |
| 275 | 15x |
fixTempVarNames <- sort(tempVarNames[-c( |
| 276 | 15x |
1, |
| 277 | 15x |
attr(Terms, "specials")$uc, |
| 278 | 15x |
attr(Terms, "specials")$bfp |
| 279 |
)]) |
|
| 280 | ||
| 281 | ||
| 282 |
## now sort the formula |
|
| 283 | 15x |
sortedFormula <- paste( |
| 284 | 15x |
deparse(Terms[[2]]), |
| 285 | 15x |
"~ 1 +", |
| 286 | 15x |
paste(c(bfpTempVarNames, ucTempVarNames, fixTempVarNames), |
| 287 | 15x |
collapse = "+" |
| 288 |
) |
|
| 289 |
) |
|
| 290 | 15x |
sortedFormula <- as.formula(sortedFormula) |
| 291 | ||
| 292 |
## filter special parts in formula: uncertain covariates (uc) and (Bayesian) fractional polynomials (bfp) |
|
| 293 | 15x |
Terms <- if (missing(data)) {
|
| 294 | ! |
terms(sortedFormula, special) |
| 295 |
} else {
|
|
| 296 | 15x |
terms(sortedFormula, special, data = data) |
| 297 |
} |
|
| 298 | ||
| 299 | 15x |
ucTermInd <- attr(Terms, "specials")$uc # special indices in original formula (beginning with 1 = response!) |
| 300 | 15x |
nUcGroups <- length(ucTermInd) |
| 301 | 15x |
bfpTermInd <- attr(Terms, "specials")$bfp |
| 302 | 15x |
nFps <- length(bfpTermInd) |
| 303 | 15x |
fixTermInd <- seq_along(tempVarNames)[-c(1, ucTermInd, bfpTermInd)] # indices of fixed terms |
| 304 | 15x |
nFixGroups <- length(fixTermInd) |
| 305 | ||
| 306 |
## check if bfp's are present |
|
| 307 | 15x |
if (nFps == 0) {
|
| 308 | 9x |
warning(simpleWarning("no fractional polynomial terms in formula"))
|
| 309 |
} |
|
| 310 | ||
| 311 |
## get vector with covariate entries |
|
| 312 | 15x |
vars <- attr(Terms, "variables") # language object |
| 313 | 15x |
varlist <- eval(vars, envir = data) # list |
| 314 | 15x |
covariates <- paste(as.list(vars)[-c(1, 2)]) # vector with covariate entries (no list or response or Intercept) |
| 315 | ||
| 316 |
## remove bfp() from entries and save the inner arguments |
|
| 317 | 15x |
bfpInner <- varlist[bfpTermInd] # saved for later use |
| 318 | 15x |
covariates[bfpTermInd - 1] <- unlist(bfpInner) # remove bfp( ) from formula; -1 because of reponse column |
| 319 | ||
| 320 |
## if ucs are present: |
|
| 321 | 15x |
if (nUcGroups) {
|
| 322 |
## remove uc() from entries and save the inner arguments |
|
| 323 | 9x |
ucInner <- unlist(varlist[ucTermInd]) |
| 324 | 9x |
covariates[ucTermInd - 1] <- ucInner |
| 325 | ||
| 326 |
## determine association of terms with uc groups |
|
| 327 | 9x |
ucTermLengths <- sapply(ucInner, function(oneUc) {
|
| 328 | 54x |
length(attr(terms(as.formula(paste("~", oneUc))), "term.labels"))
|
| 329 |
}) |
|
| 330 | 9x |
ucTermLengthsCum <- c(0, cumsum(ucTermLengths - 1)) # how much longer than 1, accumulated |
| 331 | 9x |
ucTermList <- lapply(seq(along = ucTermInd), function(i) { # list for association uc group and assign index
|
| 332 | 54x |
as.integer( |
| 333 | 54x |
ucTermInd[i] - 1 + # Starting assign index |
| 334 | 54x |
ucTermLengthsCum[i] + # add lengths from before |
| 335 | 54x |
0:(ucTermLengths[i] - 1) # range for this uc term |
| 336 |
) |
|
| 337 |
}) |
|
| 338 |
} else {
|
|
| 339 | 6x |
ucInner <- ucTermList <- NULL |
| 340 |
} |
|
| 341 |
## consistency check: |
|
| 342 | 15x |
stopifnot(identical(length(ucTermList), nUcGroups)) |
| 343 | ||
| 344 |
# if fixed variables are present |
|
| 345 | 15x |
if (nFixGroups) {
|
| 346 |
## remove uc() from entries and save the inner arguments |
|
| 347 | ! |
fixInner <- fixTempVarNames |
| 348 | ! |
covariates[fixTermInd - 1] <- fixInner |
| 349 | ||
| 350 |
## determine association of terms with fixed groups |
|
| 351 | ! |
fixTermLengths <- sapply(fixInner, function(oneFix) {
|
| 352 | ! |
length(attr(terms(as.formula(paste("~", oneFix))), "term.labels"))
|
| 353 |
}) |
|
| 354 | ! |
fixTermLengthsCum <- c(0, cumsum(fixTermLengths - 1)) # how much longer than 1, accumulated |
| 355 | ! |
fixTermList <- lapply(seq(along = fixTermInd), function(i) { # list for association uc group and assign index
|
| 356 | ! |
as.integer( |
| 357 | ! |
fixTermInd[i] - 1 + # Starting assign index |
| 358 | ! |
fixTermLengthsCum[i] + # add lengths from before |
| 359 | ! |
0:(fixTermLengths[i] - 1) # range for this uc term |
| 360 |
) |
|
| 361 |
}) |
|
| 362 |
} else {
|
|
| 363 | 15x |
fixInner <- fixTermList <- NULL |
| 364 |
} |
|
| 365 | ||
| 366 | ||
| 367 |
## check that not two entries are present for any covariate, |
|
| 368 |
## i.e. the covariate names must be unique |
|
| 369 | 15x |
if (!identical( |
| 370 | 15x |
covariates, |
| 371 | 15x |
unique(covariates) |
| 372 |
)) {
|
|
| 373 | ! |
stop(simpleError(paste( |
| 374 | ! |
"Duplicate covariates in formula:", |
| 375 | ! |
paste(covariates[duplicated(covariates)], |
| 376 | ! |
sep = ", " |
| 377 |
) |
|
| 378 |
))) |
|
| 379 |
} |
|
| 380 | ||
| 381 |
## build new formula from the cleaned covariate entries |
|
| 382 | 15x |
newFormula <- # is saved for predict method at the end |
| 383 | 15x |
update( |
| 384 | 15x |
sortedFormula, |
| 385 | 15x |
paste( |
| 386 | 15x |
".~ 1 +", |
| 387 | 15x |
paste(covariates, collapse = "+") |
| 388 | 15x |
) # only update RHS |
| 389 |
) |
|
| 390 | 15x |
newTerms <- if (missing(data)) {
|
| 391 | ! |
terms(newFormula) |
| 392 |
} else {
|
|
| 393 | 15x |
terms(newFormula, data = data) |
| 394 |
} |
|
| 395 | ||
| 396 |
## build model frame |
|
| 397 | 15x |
m$formula <- newTerms |
| 398 | 15x |
m$scale <- m$family <- m$verbose <- NULL |
| 399 | 15x |
m[[1]] <- as.name("model.frame")
|
| 400 | 15x |
m <- eval(m, sys.parent()) |
| 401 | ||
| 402 |
## build design matrix |
|
| 403 | 15x |
X <- model.matrix(newTerms, m) |
| 404 | 15x |
Xcentered <- scale(X, center = centerX, scale = FALSE) |
| 405 | ||
| 406 |
## get and check weights |
|
| 407 | 15x |
weights <- as.vector(model.weights(m)) |
| 408 | 15x |
if (is.null(weights)) {
|
| 409 | 15x |
weights <- rep(1, nrow(X)) |
| 410 |
} |
|
| 411 | ||
| 412 | 15x |
if (!is.null(weights) && !is.numeric(weights)) {
|
| 413 | ! |
stop(simpleError("'weights' must be a numeric vector"))
|
| 414 |
} |
|
| 415 | 15x |
if (!is.null(weights) && any(weights < 0)) {
|
| 416 | ! |
stop(simpleError("negative weights not allowed"))
|
| 417 |
} |
|
| 418 | ||
| 419 |
## get response |
|
| 420 | 15x |
Y <- model.response(m) |
| 421 | ||
| 422 |
## get offsets |
|
| 423 | 15x |
offset <- as.vector(model.offset(m)) |
| 424 | 15x |
if (!is.null(offset)) {
|
| 425 | ! |
if (length(offset) != NROW(Y)) {
|
| 426 | ! |
stop(gettextf( |
| 427 | ! |
"number of offsets is %d should equal %d (number of observations)", |
| 428 | ! |
length(offset), NROW(Y) |
| 429 | ! |
), domain = NA) |
| 430 |
} |
|
| 431 |
} else {
|
|
| 432 | 15x |
offset <- rep.int(0, nrow(X)) |
| 433 |
} |
|
| 434 | ||
| 435 |
## check length of censoring vector in the Cox case |
|
| 436 | 15x |
if (!doGlm) {
|
| 437 | 11x |
if (inherits(attr(m, "na.action"), "omit")) {
|
| 438 | ! |
censInd <- censInd[-attr(m, "na.action")] |
| 439 |
} |
|
| 440 | 11x |
stopifnot(identical(length(censInd), length(Y))) |
| 441 |
} |
|
| 442 | ||
| 443 |
## initialize (here e.g. the binomial matrix Y case is handled as in 'glm') |
|
| 444 | 15x |
init <- family$init(y = Y, weights = weights) |
| 445 | ||
| 446 | 15x |
Y <- init$y |
| 447 | 15x |
family$weights <- as.double(init$weights) |
| 448 | 15x |
family$offsets <- as.double(offset) |
| 449 | 15x |
family$dispersions <- as.double(family$phi / init$weights) # and zero weights ?! |
| 450 | 15x |
family$linPredStart <- as.double(init$linPredStart) |
| 451 | ||
| 452 |
## which terms gave rise to which columns? |
|
| 453 |
## (0 = intercept, 1 = first term) |
|
| 454 | 15x |
termNumbers <- attr(X, "assign") |
| 455 | ||
| 456 |
## vector of length col (X) giving uc group indices or 0 (no uc) |
|
| 457 |
## for associating uc groups with model matrix columns: ucIndices |
|
| 458 | 15x |
fixIndices <- ucIndices <- fpMaxs <- integer(length(termNumbers)) |
| 459 | ||
| 460 |
## list for mapping group -> columns in model matrix: ucColList |
|
| 461 | 15x |
if (nUcGroups) {
|
| 462 | 9x |
for (i in seq(along = ucTermList)) {
|
| 463 | 54x |
ucIndices[termNumbers %in% ucTermList[[i]]] <- i |
| 464 |
} |
|
| 465 | ||
| 466 | 9x |
ucColList <- lapply(seq(along = ucTermList), function(ucGroup) which(ucIndices == ucGroup)) |
| 467 |
} else {
|
|
| 468 | 6x |
ucColList <- NULL |
| 469 |
} |
|
| 470 | ||
| 471 |
## list for mapping group -> columns in model matrix: fixColList |
|
| 472 | 15x |
if (nFixGroups) {
|
| 473 | ! |
for (i in seq(along = fixTermList)) {
|
| 474 | ! |
fixIndices[termNumbers %in% fixTermList[[i]]] <- i |
| 475 |
} |
|
| 476 | ||
| 477 | ! |
fixColList <- lapply(seq(along = fixTermList), function(fixGroup) which(fixIndices == fixGroup)) |
| 478 |
} else {
|
|
| 479 | 15x |
fixColList <- NULL |
| 480 |
} |
|
| 481 | ||
| 482 | ||
| 483 |
## vectors of length col (X) giving maximum fp degrees or 0 (no bfp) |
|
| 484 |
## and if scaling is wanted (1) or not (0) |
|
| 485 |
## for associating bfps with model matrix columns |
|
| 486 |
## In addition, scale Columns or exit if non-positive values occur |
|
| 487 | 15x |
bfpInds <- bfpTermInd - 1 + attr(Terms, "intercept") # now indexing matrix column |
| 488 | 15x |
for (i in seq(along = bfpInner)) {
|
| 489 | 36x |
colInd <- bfpInds[i] |
| 490 | 36x |
fpObj <- bfpInner[[i]] |
| 491 | ||
| 492 | 36x |
fpMaxs[colInd] <- attr(fpObj, "max") |
| 493 | ||
| 494 |
## get scaling info |
|
| 495 | 36x |
scaleResult <- fpScale( |
| 496 | 36x |
c( |
| 497 | 36x |
attr(fpObj, "rangeVals"), # extra values not in the data |
| 498 | 36x |
X[, colInd] |
| 499 | 36x |
), # covariate data |
| 500 | 36x |
scaling = attr(fpObj, "scale") |
| 501 | 36x |
) # scaling wished? |
| 502 | 36x |
attr(bfpInner[[i]], "prescalingValues") <- scaleResult |
| 503 | ||
| 504 |
## do the scaling |
|
| 505 | 36x |
X[, colInd] <- X[, colInd] + scaleResult$shift |
| 506 | 36x |
X[, colInd] <- X[, colInd] / scaleResult$scale |
| 507 | ||
| 508 |
## check positivity |
|
| 509 | 36x |
if (min(X[, colInd]) <= 0) {
|
| 510 | ! |
stop(simpleError(paste("prescaling necessary for negative values in variable", fpObj)))
|
| 511 |
} |
|
| 512 |
} |
|
| 513 | ||
| 514 |
## check that all maximum FP degrees are equal, so that the SWITCH move |
|
| 515 |
## in the model sampling algorithm will always be possible. |
|
| 516 |
## This assumption could potentially be removed later on, or otherwise the "max" option in bfp() |
|
| 517 |
## could be removed. |
|
| 518 | 15x |
if (length(unique(fpMaxs[fpMaxs != 0])) > 1L) {
|
| 519 | ! |
stop(simpleError("all maximum FP degrees must be identical"))
|
| 520 |
} |
|
| 521 | ||
| 522 |
## Check if factor variables are used in bfp |
|
| 523 | 15x |
if (any(c("logical", "factor") %in% lapply(m, class)[bfpInds])) {
|
| 524 | ! |
stop(simpleError("logical or factor variables cannot be fractional polynomials"))
|
| 525 |
} |
|
| 526 | ||
| 527 | ||
| 528 |
## check that only the intercept (one column) is a fixed term |
|
| 529 | 15x |
if (sum(!((fpMaxs | ucIndices) | fixIndices)) > 1) {
|
| 530 | ! |
stop(simpleError("only the intercept can be a fixed term"))
|
| 531 |
} |
|
| 532 | ||
| 533 |
## attach a loglik-function, which is then called from the C++ code. |
|
| 534 |
## It gives then the loglikelihood of the mu vector of means. |
|
| 535 | 15x |
family$loglik <- function(mu) {
|
| 536 | ! |
return(-0.5 * sum(family$dev.resids(y = Y, mu = mu, wt = weights)) / phi) |
| 537 |
} |
|
| 538 |
## note that this does not include normalizing constants of |
|
| 539 |
## the sampling density, e.g. - 0.5 * log(2 * pi * phi) is *not* |
|
| 540 |
## included in the Gaussian case! |
|
| 541 | ||
| 542 |
## fit the null model |
|
| 543 | 15x |
nullModelFit <- glm(Y ~ 1, |
| 544 | 15x |
weights = family$weights, |
| 545 | 15x |
family = family$family, |
| 546 | 15x |
offset = family$offsets |
| 547 |
) |
|
| 548 | 15x |
nullModelLogMargLik <- |
| 549 | 15x |
if (tbf) {
|
| 550 |
## if we use TBF, then the log marginal likelihood is the log |
|
| 551 |
## test-based Bayes factor of the model versus the null model. Of |
|
| 552 |
## course the Bayes factor of the null model versus itself is 1, so |
|
| 553 |
## the log BF is 0. |
|
| 554 | 11x |
0 |
| 555 |
} else {
|
|
| 556 |
## compute log marginal likelihood in the null model |
|
| 557 |
## via the Laplace approximation: |
|
| 558 | 4x |
-nullModelFit$deviance / 2 + 0.5 * log(2 * pi * vcov(nullModelFit)) |
| 559 |
} |
|
| 560 | 15x |
nullModelDeviance <- nullModelFit$deviance |
| 561 | ||
| 562 |
## compute and print cardinality of the model space to guide decision |
|
| 563 | 15x |
fpSetCards <- ifelse(fpMaxs[fpMaxs != 0] <= 3, 8, 5 + fpMaxs[fpMaxs != 0]) |
| 564 | ||
| 565 |
# computes number of possible univariate fps (including omission) |
|
| 566 |
# given the maximum fp degree |
|
| 567 | 15x |
getNumberPossibleFps <- function(maxDegree) {
|
| 568 | 123x |
s <- ifelse(maxDegree <= 3, 8, 5 + maxDegree) # card. of power set |
| 569 | 123x |
singleDegreeNumbers <- sapply( |
| 570 | 123x |
seq.int(from = 0, to = maxDegree), |
| 571 | 123x |
function(m) choose(s - 1 + m, m) |
| 572 |
) |
|
| 573 | 123x |
return(sum(singleDegreeNumbers)) |
| 574 |
} |
|
| 575 | 15x |
singleNumbers <- sapply(fpMaxs, getNumberPossibleFps) |
| 576 |
# maximum number of possible models (+1 for model with only fixed covariates) |
|
| 577 | 15x |
totalNumber <- prod(singleNumbers) * 2^(nUcGroups) + 1 |
| 578 | ||
| 579 |
## process the nModels argument |
|
| 580 | 15x |
if (missing(nModels)) {
|
| 581 |
## then we would like to have the default number of models: |
|
| 582 | 7x |
nModels <- max(1L, floor(totalNumber / 100)) |
| 583 |
} |
|
| 584 |
## check nModels is at least 1 |
|
| 585 | 15x |
if (nModels < 1) {
|
| 586 | ! |
stop(simpleError("nModels must at least be 1"))
|
| 587 |
} |
|
| 588 | ||
| 589 | ||
| 590 |
## decide if we are going to do model sampling or an exhaustive search |
|
| 591 | 15x |
if (identical(method, "ask")) {
|
| 592 | ! |
cat("The cardinality of the model space is at most ", totalNumber, ".\n", sep = "")
|
| 593 | ! |
decision <- substr( |
| 594 | ! |
readline(paste( |
| 595 | ! |
"Do you want to do a deterministic search for the best model (y)", |
| 596 | ! |
"or sample from the model space (n) or abort (else) ?\n" |
| 597 |
)), |
|
| 598 | ! |
1, 1 |
| 599 |
) |
|
| 600 | ||
| 601 |
## ensure that correct decision string has been entered, otherwise abort |
|
| 602 | ! |
if (!decision %in% c("y", "n")) {
|
| 603 | ! |
cat("Aborting.\n")
|
| 604 | ! |
return() |
| 605 |
} |
|
| 606 |
} else {
|
|
| 607 | 15x |
decision <- switch(method, |
| 608 | 15x |
exhaustive = "y", |
| 609 | 15x |
sampling = "n" |
| 610 |
) |
|
| 611 |
} |
|
| 612 |
## and the match.arg before ensures that no further problems can occur. |
|
| 613 | ||
| 614 |
## translate the decision to logical variable |
|
| 615 | 15x |
doSampling <- identical(decision, "n") |
| 616 | ||
| 617 |
## ensure that we only do model sampling if there is more than 1 FP term in the model |
|
| 618 | 15x |
if (doSampling && identical(nFps, 1L)) {
|
| 619 | ! |
warning(simpleWarning(paste( |
| 620 | ! |
"We need to do an exhaustive computation of all models,", |
| 621 | ! |
"because there is only 1 FP term in the model!" |
| 622 |
))) |
|
| 623 | ! |
doSampling <- FALSE |
| 624 |
} |
|
| 625 | ||
| 626 |
## if sampling, we possibly ask for the chainlength |
|
| 627 | 15x |
if (doSampling) {
|
| 628 |
## get chainlength? |
|
| 629 | 14x |
if (identical(method, "ask")) {
|
| 630 | ! |
chainlength <- as.numeric(readline("How long do you want the Markov chain to run?\n"))
|
| 631 |
} |
|
| 632 | ||
| 633 | ||
| 634 |
## compute the default number of models to be saved |
|
| 635 | 14x |
if (is.null(nModels)) {
|
| 636 | ! |
nModels <- as.integer(max(chainlength / 100, 1L)) |
| 637 |
} else {
|
|
| 638 | 14x |
stopifnot(nModels >= 1L) |
| 639 |
} |
|
| 640 | ||
| 641 |
## check the chosen cache size |
|
| 642 | 14x |
nCache <- as.integer(nCache) |
| 643 | 14x |
stopifnot(nCache >= nModels) |
| 644 | ||
| 645 | 14x |
if (verbose) {
|
| 646 | 4x |
cat("Starting sampler...\n")
|
| 647 |
} |
|
| 648 |
} else {
|
|
| 649 | 1x |
if (verbose) {
|
| 650 | ! |
cat("Starting with computation of every model...\n")
|
| 651 |
} |
|
| 652 |
} |
|
| 653 | ||
| 654 |
## start the progress bar (is continued in the C++ code) |
|
| 655 | 15x |
if (verbose) {
|
| 656 | 4x |
cat("0%", rep("_", 100 - 6), "100%\n", sep = "")
|
| 657 |
} |
|
| 658 | ||
| 659 |
## if Cox model is requested, |
|
| 660 |
## order the data according to the survival times! |
|
| 661 | 15x |
if (!doGlm) {
|
| 662 | 11x |
sorted <- order(Y) |
| 663 | ||
| 664 | 11x |
Y <- Y[sorted] |
| 665 | 11x |
censInd <- censInd[sorted] |
| 666 | 11x |
X <- X[sorted, ] |
| 667 | 11x |
Xcentered <- Xcentered[sorted, ] |
| 668 | ||
| 669 | 11x |
if (!all(sorted == seq_along(Y))) {
|
| 670 | 11x |
warning("Input data were reordered so that the survival times are sorted")
|
| 671 |
} |
|
| 672 |
} |
|
| 673 | ||
| 674 |
## pack the data together |
|
| 675 | 15x |
data <- list( |
| 676 | 15x |
x = X, # design matrix |
| 677 | 15x |
xCentered = Xcentered, # centered design matrix |
| 678 | 15x |
y = as.double(Y), # response vector |
| 679 | 15x |
nObs = nrow(X), # number of observations |
| 680 | 15x |
censInd = as.integer(censInd) |
| 681 | 15x |
) # binary censoring indicator |
| 682 | ||
| 683 |
## pack the FP info things together |
|
| 684 | 15x |
fpInfos <- list( |
| 685 | 15x |
fpmaxs = as.integer(fpMaxs[fpMaxs != 0]), # vector of maximum fp degrees) |
| 686 | 15x |
fppos = as.integer(bfpInds), # vector of fp columns |
| 687 | 15x |
fpcards = as.integer(fpSetCards), # cardinality of corresponding power sets |
| 688 | 15x |
fpnames = if (nFps == 0) character(0) else unlist(bfpInner) |
| 689 | 15x |
) # names of fp terms. Note that |
| 690 |
# it is necessary to have this if-else |
|
| 691 |
# construction (ifelse does not work, |
|
| 692 |
# because it would take the first |
|
| 693 |
# element of the length-0 vector |
|
| 694 |
# character(0), which is NA!!) |
|
| 695 | ||
| 696 | ||
| 697 | ||
| 698 |
## pack the UC info things together |
|
| 699 | 15x |
ucInfos <- list( |
| 700 | 15x |
ucIndices = as.integer(ucIndices), # vector giving uncertainty custer indices |
| 701 |
# (column -> which group) |
|
| 702 | 15x |
ucColList = ucColList |
| 703 | 15x |
) # list for group -> which columns mapping |
| 704 | ||
| 705 | ||
| 706 |
## pack the UC info things together |
|
| 707 | 15x |
fixInfos <- list( |
| 708 | 15x |
fixIndices = as.integer(fixIndices), # vector giving fixed covaraite custer indices |
| 709 |
# (column -> which group) |
|
| 710 | 15x |
fixColList = fixColList |
| 711 | 15x |
) # list for group -> which columns mapping |
| 712 | ||
| 713 | ||
| 714 | ||
| 715 |
## pack model search configuration: |
|
| 716 | 15x |
searchConfig <- list( |
| 717 | 15x |
totalNumber = as.double(totalNumber), # cardinality of model space |
| 718 | 15x |
nModels = as.integer(nModels), # number of best |
| 719 |
# models returned |
|
| 720 | 15x |
empiricalBayes = empiricalBayes, # use EB for g and |
| 721 |
# conditional marginal likelihoods? |
|
| 722 | 15x |
useFixedg = useFixedg, # use a fixed value of g? |
| 723 | 15x |
useFixedc = fixedcfactor, |
| 724 | 15x |
doSampling = doSampling, # shall model sampling be done? If |
| 725 |
# false, then exhaustive search. |
|
| 726 | 15x |
chainlength = as.double(chainlength), # how many times should a jump be |
| 727 |
# proposed? |
|
| 728 | 15x |
nCache = nCache, # how many models to cache at the same time |
| 729 | 15x |
largeVariance = as.double(largeVariance), # what is a "large" variance output |
| 730 |
# of BFGS? |
|
| 731 | 15x |
useBfgs = useBfgs |
| 732 | 15x |
) # should we use the BFGS algorithm (or |
| 733 |
# Brent's optimize)? |
|
| 734 | ||
| 735 |
## pack prior and likelihood information: |
|
| 736 | 15x |
distribution <- list( |
| 737 | 15x |
nullModelLogMargLik = nullModelLogMargLik, # log marg lik |
| 738 |
# of the null model. |
|
| 739 | 15x |
nullModelDeviance = nullModelDeviance, # corresponding deviance |
| 740 | 15x |
doGlm = doGlm, # should we do a GLM or a Cox model search? |
| 741 | 15x |
tbf = tbf, # should TBF methodology be used to |
| 742 |
# compute Bayes factors? |
|
| 743 | 15x |
fixedg = as.double(fixedg), # the fixed value of g |
| 744 |
# (0 if not used) |
|
| 745 | 15x |
gPrior = priorSpecs$gPrior, # prior on the covariance |
| 746 |
# factor g (S4 class object) |
|
| 747 | 15x |
modelPrior = priorSpecs$modelPrior, # model prior string |
| 748 | 15x |
family = family, # GLM family and link, |
| 749 | 15x |
yMean = mean(Y), # pass the mean, which we use when fixedc=TRUE |
| 750 | 15x |
empiricalgPrior = empiricalgPrior |
| 751 | 15x |
) # should we use empirical g prior |
| 752 | ||
| 753 |
## pack other options |
|
| 754 | 15x |
options <- list( |
| 755 | 15x |
verbose = verbose, # should progress be displayed? |
| 756 | 15x |
debug = debug, # echo debug-style messages? |
| 757 | 15x |
gaussHermite = gaussHermite, # nodes and weights for Gauss |
| 758 |
# Hermite quadratures |
|
| 759 | 15x |
useOpenMP = useOpenMP, # should we use openMP for speed up? |
| 760 | 15x |
higherOrderCorrection = higherOrderCorrection |
| 761 | 15x |
) # should |
| 762 |
# the higher-order Laplace correction be used? |
|
| 763 | ||
| 764 |
## then go C++ |
|
| 765 | 15x |
Ret <- cpp_glmBayesMfp( |
| 766 | 15x |
data, |
| 767 | 15x |
fpInfos, |
| 768 | 15x |
ucInfos, |
| 769 | 15x |
fixInfos, |
| 770 | 15x |
searchConfig, |
| 771 | 15x |
distribution, |
| 772 | 15x |
options |
| 773 |
) |
|
| 774 | ||
| 775 |
## C++ attaches the following attributes: |
|
| 776 | ||
| 777 |
## numVisited |
|
| 778 |
## inclusionProbs |
|
| 779 |
## logNormConst |
|
| 780 | ||
| 781 |
## name the inclusion probabilities |
|
| 782 | 15x |
names(attr(Ret, "inclusionProbs")) <- c(unlist(bfpInner), ucInner) |
| 783 | ||
| 784 |
## name the models with the model index |
|
| 785 | 15x |
names(Ret) <- seq_along(Ret) |
| 786 | ||
| 787 |
## attach additional information: |
|
| 788 | ||
| 789 |
## information passed to C++, which is important for e.g. the function "sampleGlm" |
|
| 790 | 15x |
attr(Ret, "data") <- data |
| 791 | 15x |
attr(Ret, "fpInfos") <- fpInfos |
| 792 | 15x |
attr(Ret, "ucInfos") <- ucInfos |
| 793 | 15x |
attr(Ret, "fixInfos") <- fixInfos |
| 794 | 15x |
attr(Ret, "searchConfig") <- searchConfig |
| 795 | 15x |
attr(Ret, "distribution") <- distribution |
| 796 | 15x |
attr(Ret, "options") <- options |
| 797 | ||
| 798 |
## original call and formula |
|
| 799 | 15x |
attr(Ret, "call") <- call |
| 800 | 15x |
attr(Ret, "formula") <- newFormula |
| 801 | ||
| 802 |
## prior specs argument |
|
| 803 | 15x |
attr(Ret, "priorSpecs") <- priorSpecs |
| 804 | ||
| 805 | ||
| 806 |
## list with index info |
|
| 807 |
# fixedInds <- setdiff (1:ncol (X), c (bfpInds, which (ucIndices > 0))) |
|
| 808 | 15x |
attr(Ret, "indices") <- list( |
| 809 | 15x |
uc = ucIndices, |
| 810 | 15x |
ucList = ucColList, |
| 811 | 15x |
bfp = bfpInds, |
| 812 | 15x |
fixed = fixIndices |
| 813 |
) |
|
| 814 | ||
| 815 | ||
| 816 |
## names of the terms |
|
| 817 | 15x |
fixedNamesInds <- c(setdiff(2:length(varlist), unlist(attr(Terms, "specials"))), 1) |
| 818 | ||
| 819 |
# interceptName <- ifelse (attr (Terms, "intercept"), "(Intercept)", NULL) |
|
| 820 | 15x |
interceptName <- if (doGlm) "(Intercept)" else if (!doGlm) NULL |
| 821 | ||
| 822 | 15x |
attr(Ret, "termNames") <- list( |
| 823 | 15x |
fixed = c(interceptName, fixInner), |
| 824 | 15x |
bfp = unlist(bfpInner), |
| 825 | 15x |
uc = ucInner |
| 826 |
) |
|
| 827 | ||
| 828 |
## matrix with shift/scale info, maximum degree and cardinality of powerset |
|
| 829 | 15x |
shiftScaleMaxMat <- matrix(nrow = nFps, ncol = 4) |
| 830 | 15x |
colnames(shiftScaleMaxMat) <- c( |
| 831 | 15x |
"shift", "scale", "maxDegree", |
| 832 | 15x |
"cardPowerset" |
| 833 |
) |
|
| 834 | ||
| 835 | 15x |
if (nFps > 0L) {
|
| 836 | 6x |
shiftScaleMaxMat[, 1:2] <- matrix( |
| 837 | 6x |
unlist(lapply( |
| 838 | 6x |
bfpInner, |
| 839 | 6x |
attr, |
| 840 | 6x |
"prescalingValues" |
| 841 |
)), |
|
| 842 | 6x |
ncol = 2, |
| 843 | 6x |
byrow = TRUE |
| 844 |
) |
|
| 845 | 6x |
shiftScaleMaxMat[, 3] <- fpMaxs[fpMaxs != 0] |
| 846 | 6x |
shiftScaleMaxMat[, 4] <- fpSetCards |
| 847 | ||
| 848 | 6x |
rownames(shiftScaleMaxMat) <- unlist(bfpInner) |
| 849 |
} |
|
| 850 | ||
| 851 | 15x |
attr(Ret, "shiftScaleMax") <- shiftScaleMaxMat |
| 852 | ||
| 853 |
## set class and return |
|
| 854 | 15x |
class(Ret) <- c("GlmBayesMfp", "list")
|
| 855 | 15x |
return(Ret) |
| 856 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[getMarginalZ.R] by DSB Mon 26/08/2013 17:19 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Construct a (smooth) marginal z density approximation from a model |
|
| 9 |
## information list |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 17/02/2010 split off from sampleGlm and also export the function |
|
| 13 |
## 25/02/2010 Different strategy: now potentially all four methods are tried |
|
| 14 |
## for the approximation, and the user may specify the order. |
|
| 15 |
## The normal approximation is no longer a special case which issued |
|
| 16 |
## a warning; not it is just one approximation method among all. |
|
| 17 |
## First try "spline", then "logspline", because sometimes this can remedy |
|
| 18 |
## the "too close to 0" error with "spline". |
|
| 19 |
## 12/04/2010 limit the computation time for pinv.new to prepare against |
|
| 20 |
## infinite loops in this function. |
|
| 21 |
## 15/05/2010 implement a very robust density approximation, which is just the |
|
| 22 |
## linear interpolation of the saved points. This "linear" method |
|
| 23 |
## differs from the other methods in that it does not depend on the |
|
| 24 |
## Runuran generators. |
|
| 25 |
## 17/05/2010 "linear" is now the default, because it is very robust and works |
|
| 26 |
## nicely! |
|
| 27 |
## 25/05/2010 now the "logDensVals" may contain NaN's, which we do not want to |
|
| 28 |
## include in the construction of the approximation, so we discard |
|
| 29 |
## these pairs early enough. |
|
| 30 |
##################################################################################### |
|
| 31 | ||
| 32 | ||
| 33 |
##' Construct a (smooth) marginal z density approximation from a model |
|
| 34 |
##' information list |
|
| 35 |
##' |
|
| 36 |
##' @param info the model information list |
|
| 37 |
##' @param method method for approximating the marginal density: |
|
| 38 |
##' \describe{
|
|
| 39 |
##' \item{linear}{Linearly interpolate the points.}
|
|
| 40 |
##' \item{spline}{The saved points of the unnormalized density approximation are joined
|
|
| 41 |
##' by a \dQuote{monotonic} spline. The density is smoothed out to zero at the
|
|
| 42 |
##' tails. Since the spline might be slightly negative for extreme values, the positive part |
|
| 43 |
##' is returned.} |
|
| 44 |
##' \item{logspline}{The saved points of the log unnormalized density approximation are joined
|
|
| 45 |
##' by a \dQuote{monotonic} spline, which is then exponentiated.}
|
|
| 46 |
##' \item{normalspline}{A \dQuote{monotonic} spline is fitted to the differences of the saved
|
|
| 47 |
##' log density values and the log normal approximation. The resulting spline function is |
|
| 48 |
##' exponentiated and then multiplied with the normal density.} |
|
| 49 |
##' \item{normal}{Just take the normal approximation.}
|
|
| 50 |
##' This may also be a vector with more than one method names, to select the modify the preference |
|
| 51 |
##' sequence: If the first method does not work, the second is tried and so on. The normal |
|
| 52 |
##' approximation always \dQuote{works} (but may give bad results).
|
|
| 53 |
##' } |
|
| 54 |
##' @param verbose Echo the chosen method? (not default) |
|
| 55 |
##' @param plot produce plots of the different approximation steps? (not default) |
|
| 56 |
##' @return a list with the log of the normalized density approximation (\dQuote{logDens}) and
|
|
| 57 |
##' the random number generator (\dQuote{gen}).
|
|
| 58 |
##' |
|
| 59 |
##' @export |
|
| 60 |
##' @keywords internal |
|
| 61 |
getMarginalZ <- function(info, |
|
| 62 |
method = |
|
| 63 |
c( |
|
| 64 |
"linear", "spline", "logspline", |
|
| 65 |
"normalspline", "normal" |
|
| 66 |
), |
|
| 67 |
verbose = FALSE, |
|
| 68 |
plot = FALSE) {
|
|
| 69 |
## get the (ordered) z |
|
| 70 | 38x |
zVals <- info$negLogUnnormZDensities$args |
| 71 | 38x |
zOrder <- order(zVals) |
| 72 | 38x |
zVals <- zVals[zOrder] |
| 73 | ||
| 74 |
## the range of z |
|
| 75 | 38x |
zMin <- zVals[1L] |
| 76 | 38x |
zMax <- zVals[length(zVals)] |
| 77 | ||
| 78 |
## get (roughly normalized) and ordered log density values |
|
| 79 | 38x |
logDensVals <- with( |
| 80 | 38x |
info, |
| 81 | 38x |
-negLogUnnormZDensities$vals - logMargLik |
| 82 |
) |
|
| 83 | 38x |
logDensVals <- logDensVals[zOrder] |
| 84 | ||
| 85 |
## remove pairs with NaN log density values |
|
| 86 | 38x |
isNaN <- is.nan(logDensVals) |
| 87 | ||
| 88 | 38x |
logDensVals <- logDensVals[!isNaN] |
| 89 | 38x |
zVals <- zVals[!isNaN] |
| 90 | ||
| 91 |
## optionally plot that |
|
| 92 | 38x |
if (plot) {
|
| 93 | ! |
par(mfrow = c(2, 2)) |
| 94 | ! |
plot(zVals, |
| 95 | ! |
exp(logDensVals), |
| 96 | ! |
type = "o", |
| 97 | ! |
main = "roughly normalized original values" |
| 98 |
) |
|
| 99 | ! |
abline(v = info$zMode) |
| 100 |
} |
|
| 101 | ||
| 102 |
## the approximating normal density is: |
|
| 103 | 38x |
normalDens <- function(z, log = FALSE) {
|
| 104 | ! |
dnorm( |
| 105 | ! |
x = z, |
| 106 | ! |
mean = info$zMode, |
| 107 | ! |
sd = sqrt(info$zVar), |
| 108 | ! |
log = log |
| 109 |
) |
|
| 110 |
} |
|
| 111 | ||
| 112 |
## show it if wished |
|
| 113 | 38x |
if (plot) {
|
| 114 | ! |
curve(normalDens, |
| 115 | ! |
from = zMin, to = zMax, |
| 116 | ! |
main = "normal approximation", |
| 117 | ! |
n = 301L |
| 118 |
) |
|
| 119 | ! |
abline(v = info$zMode) |
| 120 |
} |
|
| 121 | ||
| 122 | ||
| 123 |
## decide on the range of the spline function |
|
| 124 | 38x |
constant <- 0.2 * (zMax - zMin) |
| 125 | 38x |
extendedZVals <- c( |
| 126 | 38x |
zMin - constant, |
| 127 | 38x |
zVals, |
| 128 | 38x |
zMax + constant |
| 129 |
) |
|
| 130 | ||
| 131 |
## add zeroes to both ends of the density values |
|
| 132 |
## to smooth out the approximation to zero. |
|
| 133 | 38x |
densVals <- c( |
| 134 | 38x |
0, |
| 135 | 38x |
exp(logDensVals), |
| 136 | 38x |
0 |
| 137 |
) |
|
| 138 | ||
| 139 |
## grid where the approximation is checked |
|
| 140 | 38x |
extendedZGrid <- seq( |
| 141 | 38x |
from = min(extendedZVals), |
| 142 | 38x |
to = max(extendedZVals), |
| 143 | 38x |
length = 401L |
| 144 |
) |
|
| 145 | ||
| 146 |
## what are the possible methods for computing the unnormalized z density? |
|
| 147 | 38x |
possibleMethods <- |
| 148 | 38x |
expression( |
| 149 |
## -------------------- |
|
| 150 | 38x |
spline = {
|
| 151 |
## now compute the spline |
|
| 152 | ! |
cubicSplineFun <- splinefun( |
| 153 | ! |
x = extendedZVals, |
| 154 | ! |
y = densVals, |
| 155 | ! |
method = "monoH.FC" |
| 156 |
) |
|
| 157 | ||
| 158 |
## so the unnormalized z density approximation is |
|
| 159 | ! |
function(z, log = FALSE) {
|
| 160 |
## set everything outside the range to zero |
|
| 161 |
## (unfortunately this is not an option for "splinefun") |
|
| 162 | ! |
ret <- ifelse(z > min(extendedZVals) & z < max(extendedZVals), |
| 163 | ! |
cubicSplineFun(z), |
| 164 | ! |
0 |
| 165 |
) |
|
| 166 | ||
| 167 |
## take the positive part as the spline may be slightly negative |
|
| 168 |
## at the ends |
|
| 169 | ! |
ret <- pmax(ret, 0) |
| 170 | ||
| 171 |
## exponentiate them? |
|
| 172 | ! |
if (log) {
|
| 173 | ! |
return(log(ret)) |
| 174 |
} else {
|
|
| 175 | ! |
return(ret) |
| 176 |
} |
|
| 177 |
} |
|
| 178 |
}, |
|
| 179 |
## -------------------- |
|
| 180 | 38x |
linear = {
|
| 181 |
## now compute the spline |
|
| 182 | 38x |
linearSplineFun <- approxfun( |
| 183 | 38x |
x = extendedZVals, |
| 184 | 38x |
y = densVals, |
| 185 | 38x |
method = "linear", |
| 186 | 38x |
rule = 2 |
| 187 | 38x |
) # return 0 outside the range. |
| 188 | ||
| 189 |
## so the unnormalized z density approximation is |
|
| 190 | 38x |
function(z, log = FALSE) {
|
| 191 | 2238x |
ret <- linearSplineFun(z) |
| 192 | ||
| 193 |
## exponentiate them? |
|
| 194 | 2238x |
if (log) {
|
| 195 | 2200x |
return(log(ret)) |
| 196 |
} else {
|
|
| 197 | 38x |
return(ret) |
| 198 |
} |
|
| 199 |
} |
|
| 200 |
}, |
|
| 201 |
## -------------------- |
|
| 202 | 38x |
logspline = {
|
| 203 |
## add very small values to both ends of the log density values |
|
| 204 |
## to smooth out the spline |
|
| 205 | ! |
logZero <- min(logDensVals) - 1e5 |
| 206 | ! |
extendedLogDensVals <- c( |
| 207 | ! |
logZero, |
| 208 | ! |
logDensVals, |
| 209 | ! |
logZero |
| 210 |
) |
|
| 211 | ||
| 212 |
## now compute the spline |
|
| 213 | ! |
cubicSplineFun <- splinefun( |
| 214 | ! |
x = extendedZVals, |
| 215 | ! |
y = extendedLogDensVals, |
| 216 | ! |
method = "monoH.FC" |
| 217 |
) |
|
| 218 | ||
| 219 |
## so the unnormalized z density approximation is |
|
| 220 | ! |
function(z, log = FALSE) {
|
| 221 |
## set everything outside the range to zero |
|
| 222 |
## (unfortunately this is not an option for "splinefun") |
|
| 223 | ! |
logRet <- ifelse(z > min(extendedZVals) & z < max(extendedZVals), |
| 224 | ! |
cubicSplineFun(z), |
| 225 | ! |
logZero |
| 226 |
) |
|
| 227 | ||
| 228 |
## exponentiate them? |
|
| 229 | ! |
if (log) {
|
| 230 | ! |
return(logRet) |
| 231 |
} else {
|
|
| 232 | ! |
return(exp(logRet)) |
| 233 |
} |
|
| 234 |
} |
|
| 235 |
}, |
|
| 236 |
## -------------------- |
|
| 237 | 38x |
normalspline = {
|
| 238 |
## now fit the cubic spline to the differences of the log density values. |
|
| 239 |
## In order to get smooth decrease to zero, we add zeroes to the log difference vector |
|
| 240 |
## at both ends |
|
| 241 | ! |
logDiffs <- c( |
| 242 | ! |
0, |
| 243 | ! |
logDensVals - normalDens(zVals, log = TRUE), |
| 244 | ! |
0 |
| 245 |
) |
|
| 246 | ||
| 247 | ! |
cubicSplineFun <- splinefun( |
| 248 | ! |
x = extendedZVals, |
| 249 | ! |
y = logDiffs, |
| 250 | ! |
method = "monoH.FC" |
| 251 |
) |
|
| 252 | ||
| 253 |
## optionally plot the spline of the log differences |
|
| 254 | ! |
if (plot) {
|
| 255 | ! |
curve(cubicSplineFun, |
| 256 | ! |
from = min(extendedZVals), to = max(extendedZVals), |
| 257 | ! |
n = 301L, |
| 258 | ! |
main = "spline of log differences" |
| 259 |
) |
|
| 260 | ! |
points( |
| 261 | ! |
x = extendedZVals, |
| 262 | ! |
y = logDiffs |
| 263 |
) |
|
| 264 | ! |
abline(v = info$zMode) |
| 265 |
} |
|
| 266 | ||
| 267 |
## so the unnormalized z density approximation is |
|
| 268 | ! |
function(z, log = FALSE) {
|
| 269 |
## set everything outside the range to zero |
|
| 270 |
## (unfortunately this is not an option for "splinefun") |
|
| 271 | ! |
splineVals <- ifelse(z > min(extendedZVals) & z < max(extendedZVals), |
| 272 | ! |
cubicSplineFun(z), |
| 273 | ! |
0 |
| 274 |
) |
|
| 275 | ||
| 276 |
## the log values: |
|
| 277 | ! |
logRet <- splineVals + normalDens(z, |
| 278 | ! |
log = TRUE |
| 279 |
) |
|
| 280 | ||
| 281 |
## exponentiate them? |
|
| 282 | ! |
if (log) {
|
| 283 | ! |
return(logRet) |
| 284 |
} else {
|
|
| 285 | ! |
return(exp(logRet)) |
| 286 |
} |
|
| 287 |
} |
|
| 288 |
}, |
|
| 289 | ||
| 290 |
## -------------------- |
|
| 291 | 38x |
normal = {
|
| 292 | ! |
normalDens |
| 293 |
} |
|
| 294 |
) |
|
| 295 | ||
| 296 |
## decide which method to try first |
|
| 297 | 38x |
firstMethods <- match.arg(method, several.ok = TRUE) |
| 298 | ||
| 299 |
## so what is the sequence this time? |
|
| 300 | 38x |
methodsSequence <- c( |
| 301 | 38x |
firstMethods, |
| 302 | 38x |
setdiff( |
| 303 | 38x |
c("linear", "spline", "logspline", "normalspline", "normal"),
|
| 304 | 38x |
firstMethods |
| 305 |
) |
|
| 306 |
) |
|
| 307 | ||
| 308 |
## then try the possible methods in this order |
|
| 309 | 38x |
for (m in methodsSequence) {
|
| 310 |
## get the unnormalized pdf |
|
| 311 | 38x |
unnormZdens <- eval(possibleMethods[[m]]) |
| 312 | ||
| 313 |
## first check that no infinite values occur! |
|
| 314 | 38x |
if (any(!is.finite(unnormZdens(extendedZGrid)))) {
|
| 315 | ! |
warning( |
| 316 | ! |
"Infinite values for method ", m, |
| 317 | ! |
" in density approximation", |
| 318 | ! |
"going to the next method" |
| 319 |
) |
|
| 320 | ||
| 321 |
## if any is not finite, then go on to the next method |
|
| 322 | ! |
next |
| 323 |
} |
|
| 324 | ||
| 325 |
## then try to get the generator for that pdf |
|
| 326 | 38x |
generator <- try(getGenerator( |
| 327 | 38x |
method = m, |
| 328 | 38x |
unnormDensFun = unnormZdens, |
| 329 | 38x |
xVals = extendedZVals, |
| 330 | 38x |
unnormDensVals = densVals |
| 331 |
)) |
|
| 332 | ||
| 333 |
## if this resulted in an error (computational or time-out), |
|
| 334 |
## go on to the next method, |
|
| 335 |
## else break |
|
| 336 | 38x |
if (!inherits(generator, "try-error")) {
|
| 337 | 38x |
if (verbose) {
|
| 338 | 32x |
cat("\nTaking the", m, "approximation method\n")
|
| 339 |
} |
|
| 340 | 38x |
break |
| 341 |
} |
|
| 342 |
} |
|
| 343 | ||
| 344 |
## so the normalized log density is: |
|
| 345 | 38x |
logNormZdens <- function(z) {
|
| 346 | 2200x |
unnormZdens(z, log = TRUE) - log(generator$normConst) |
| 347 |
} |
|
| 348 | ||
| 349 |
## optionally plot the final normalized approximation |
|
| 350 | 38x |
if (plot) {
|
| 351 | ! |
plot(extendedZGrid, |
| 352 | ! |
exp(logNormZdens(extendedZGrid)), |
| 353 | ! |
type = "l", |
| 354 | ! |
main = "final approximation" |
| 355 |
) |
|
| 356 | ! |
abline(v = info$zMode) |
| 357 |
} |
|
| 358 | ||
| 359 |
## return the list with the log density and the corresponding random number generator |
|
| 360 | 38x |
return(list( |
| 361 | 38x |
logDens = logNormZdens, |
| 362 | 38x |
gen = generator$generator |
| 363 |
)) |
|
| 364 |
} |
|
| 365 | ||
| 366 | ||
| 367 |
##' Internal helper function which gets the generator (and normalizing |
|
| 368 |
##' constant) |
|
| 369 |
##' |
|
| 370 |
##' @param method the methods string |
|
| 371 |
##' @param unnormDensFun the unnormalized density function |
|
| 372 |
##' @param xVals the x values |
|
| 373 |
##' @param unnormDensVals the unnormalized density values |
|
| 374 |
##' @return a list with the elements \dQuote{generator} and \dQuote{normConst},
|
|
| 375 |
##' containing the generator function (with argument \code{n} for the number
|
|
| 376 |
##' of samples) and the normalizing constant of the density, respectively. |
|
| 377 |
##' |
|
| 378 |
##' @keywords internal |
|
| 379 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 380 |
getGenerator <- function(method, |
|
| 381 |
unnormDensFun, |
|
| 382 |
xVals, |
|
| 383 |
unnormDensVals) {
|
|
| 384 | 38x |
if (method == "linear") {
|
| 385 |
## we can build the generator ourselves. |
|
| 386 | ||
| 387 |
## handy abbreviations |
|
| 388 | 38x |
nPoints <- length(xVals) |
| 389 | 38x |
z <- xVals |
| 390 | 38x |
y <- unnormDensVals |
| 391 | ||
| 392 |
## first calculate the cdf points |
|
| 393 | 38x |
cdf <- numeric(nPoints) |
| 394 | 38x |
cdf[1L] <- 0 |
| 395 | 38x |
for (i in 2:nPoints) {
|
| 396 | 4931x |
cdf[i] <- cdf[i - 1] + |
| 397 | 4931x |
(y[i] + y[i - 1]) / 2 * (z[i] - z[i - 1]) |
| 398 |
} |
|
| 399 | ||
| 400 |
## normalize |
|
| 401 | 38x |
normConst <- cdf[nPoints] |
| 402 | 38x |
cdf <- cdf / normConst |
| 403 | 38x |
y <- y / normConst |
| 404 | ||
| 405 |
## and now the corresponding inverse cdf (quantile function) |
|
| 406 | 38x |
quantFun <- function(p) {
|
| 407 |
## check argument |
|
| 408 | 61250x |
stopifnot( |
| 409 | 61250x |
0 <= p, |
| 410 | 61250x |
1 >= p |
| 411 |
) |
|
| 412 | ||
| 413 |
## check the boundary cases |
|
| 414 | 61250x |
if (p == 0) {
|
| 415 | ! |
return(z[1]) |
| 416 |
} |
|
| 417 | 61250x |
if (p == 1) {
|
| 418 | ! |
return(z[nPoints]) |
| 419 |
} |
|
| 420 | ||
| 421 |
## so now we are inside. |
|
| 422 | ||
| 423 |
## find the right interval |
|
| 424 | 61250x |
i <- 2L # we can start at 2 |
| 425 | 61250x |
while (p > cdf[i]) {
|
| 426 | 4303281x |
i <- i + 1L |
| 427 |
} |
|
| 428 |
## so now cdf[i - 1] < p <= cdf[i] |
|
| 429 | ||
| 430 | 61250x |
slope <- (y[i] - y[i - 1L]) / (z[i] - z[i - 1L]) |
| 431 | 61250x |
intercept <- y[i - 1L] |
| 432 | 61250x |
const <- cdf[i - 1] - p |
| 433 | ||
| 434 | 61250x |
solution <- (sqrt(intercept^2 - 2 * slope * const) - intercept) / slope |
| 435 | ||
| 436 | 61250x |
return(solution + z[i - 1L]) |
| 437 |
} |
|
| 438 | ||
| 439 |
## vectorize that |
|
| 440 | 38x |
quantFunVec <- Vectorize(quantFun) |
| 441 | ||
| 442 |
## so our generator function is |
|
| 443 | 38x |
gen <- function(n = 1) {
|
| 444 | 61250x |
return(quantFunVec(runif(n = n))) |
| 445 |
} |
|
| 446 | ||
| 447 |
## so we return this list: |
|
| 448 | 38x |
return(list( |
| 449 | 38x |
generator = gen, |
| 450 | 38x |
normConst = normConst |
| 451 |
)) |
|
| 452 |
} else {
|
|
| 453 |
## some Runuran generator is built. |
|
| 454 | ||
| 455 |
## the creation is somewhat slow, |
|
| 456 |
## but the generation from it is very fast. |
|
| 457 | ||
| 458 |
## However, sometimes pinv.new seems to get stuck in an |
|
| 459 |
## infinite loop. Therefore we limit the computation time |
|
| 460 |
## for this call (only). |
|
| 461 | ! |
setTimeLimit(cpu = 40) # set time limit |
| 462 | ! |
on.exit(setTimeLimit(cpu = Inf)) # delete time limit after exiting this |
| 463 |
# function. (either after normal exit or |
|
| 464 |
# after an error) |
|
| 465 | ||
| 466 | ! |
unuranObject <- pinv.new( |
| 467 | ! |
pdf = unnormDensFun, |
| 468 | ! |
lb = min(xVals), |
| 469 | ! |
ub = max(xVals), |
| 470 | ! |
center = xVals[which.max(unnormDensVals)] |
| 471 |
) |
|
| 472 | ||
| 473 |
## get the normalizing constant |
|
| 474 |
## (this is only possible *before* the object is packed!) |
|
| 475 | ! |
normConst <- unuran.details(unuranObject, show = FALSE, return.list = TRUE)$area.pdf |
| 476 | ||
| 477 |
## "pack" the object so that it can be saved just as a normal R object. |
|
| 478 | ! |
unuran.packed(unuranObject) <- TRUE |
| 479 | ||
| 480 |
## this function returns random variates from the pdf, where n is the number of variates to be |
|
| 481 |
## produced |
|
| 482 | ! |
gen <- function(n = 1) {
|
| 483 | ! |
return(ur(unuranObject, n = n)) |
| 484 |
} |
|
| 485 | ||
| 486 |
## so we return this list: |
|
| 487 | ! |
return(list( |
| 488 | ! |
generator = gen, |
| 489 | ! |
normConst = normConst |
| 490 |
)) |
|
| 491 |
} |
|
| 492 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs. |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[GPrior-classes.R] by DSB Mon 26/08/2013 11:34 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Hopefully a clean class system for the different priors on g. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 15/03/2010 file creation in response to the redesign of the C++ side. |
|
| 12 |
## 23/07/2010 we now only warn instead of producing an error when the integral |
|
| 13 |
## is not equal to 1. |
|
| 14 |
## 22/11/2012 add incomplete inverse gamma prior class |
|
| 15 |
## 26/08/2013 link IncInvGammaGPrior from the virtual g-prior class |
|
| 16 |
##################################################################################### |
|
| 17 | ||
| 18 |
## ---------------------------------------------------------------------------- |
|
| 19 | ||
| 20 |
##' The virtual g-prior class |
|
| 21 |
##' |
|
| 22 |
##' This is the virtual g-prior class from which other g-prior classes derive. |
|
| 23 |
##' The slots are: |
|
| 24 |
##' \describe{
|
|
| 25 |
##' \item{logDens}{the prior log density}
|
|
| 26 |
##' } |
|
| 27 |
##' |
|
| 28 |
##' @seealso \code{\linkS4class{HypergPrior}}, \code{\linkS4class{InvGammaGPrior}},
|
|
| 29 |
##' \code{\linkS4class{IncInvGammaGPrior}}, \code{\linkS4class{CustomGPrior}}
|
|
| 30 |
##' |
|
| 31 |
##' @name GPrior-class |
|
| 32 |
##' @keywords classes internal |
|
| 33 |
setClass( |
|
| 34 |
Class = "GPrior", |
|
| 35 |
representation = |
|
| 36 |
representation(logDens = "function"), |
|
| 37 |
contains = list("VIRTUAL"),
|
|
| 38 |
validity = |
|
| 39 |
function(object) {
|
|
| 40 |
## check that the exp of the log density function |
|
| 41 |
## is a valid density function |
|
| 42 | ||
| 43 |
XMIN <- .Machine$double.xmin |
|
| 44 |
EPS <- sqrt(.Machine$double.eps) |
|
| 45 | ||
| 46 |
integrand <- function(g) exp(object@logDens(g)) |
|
| 47 |
integral <- integrate( |
|
| 48 |
f = integrand, |
|
| 49 |
lower = XMIN, |
|
| 50 |
upper = Inf |
|
| 51 |
) |
|
| 52 | ||
| 53 |
if (integral$message != "OK") {
|
|
| 54 |
return(integral$message) |
|
| 55 |
} else {
|
|
| 56 |
if (abs(integral$value - 1) > EPS) {
|
|
| 57 |
warning( |
|
| 58 |
"density must be proper and normalized: (numerical) integral is ", |
|
| 59 |
integral$value |
|
| 60 |
) |
|
| 61 |
} |
|
| 62 | ||
| 63 |
return(TRUE) |
|
| 64 |
} |
|
| 65 |
} |
|
| 66 |
) |
|
| 67 | ||
| 68 |
## ---------------------------------------------------------------------------- |
|
| 69 | ||
| 70 |
##' The hyper-g prior class |
|
| 71 |
##' |
|
| 72 |
##' The slots are: |
|
| 73 |
##' \describe{
|
|
| 74 |
##' \item{a}{the hyperparameter}
|
|
| 75 |
##' } |
|
| 76 |
##' |
|
| 77 |
##' @seealso the constructor \code{\link{HypergPrior}}
|
|
| 78 |
##' |
|
| 79 |
##' @name HypergPrior-class |
|
| 80 |
##' @keywords classes |
|
| 81 |
##' @export |
|
| 82 |
setClass( |
|
| 83 |
Class = "HypergPrior", |
|
| 84 |
representation = |
|
| 85 |
representation(a = "numeric"), |
|
| 86 |
contains = list("GPrior"),
|
|
| 87 |
validity = |
|
| 88 |
function(object) {
|
|
| 89 |
if (object@a <= 3) {
|
|
| 90 |
return("the parameter a must be larger than 3 for proper posteriors")
|
|
| 91 |
} else {
|
|
| 92 |
return(TRUE) |
|
| 93 |
} |
|
| 94 |
} |
|
| 95 |
) |
|
| 96 | ||
| 97 | ||
| 98 |
##' Initialization method for the "HypergPrior" class |
|
| 99 |
##' |
|
| 100 |
##' @usage \S4method{initialize}{HypergPrior}(.Object, a, \dots)
|
|
| 101 |
##' @param .Object the \code{\linkS4class{HypergPrior}} we want to initialize
|
|
| 102 |
##' @param a the hyperparameter value |
|
| 103 |
##' @param \dots unused |
|
| 104 |
##' @return the initialized object |
|
| 105 |
##' |
|
| 106 |
##' @name HypergPrior-initialize |
|
| 107 |
##' @aliases HypergPrior-initialize initialize,HypergPrior-method |
|
| 108 |
##' |
|
| 109 |
##' @keywords methods internal |
|
| 110 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 111 |
##' |
|
| 112 |
##' @importFrom methods callNextMethod |
|
| 113 |
##' |
|
| 114 |
setMethod( |
|
| 115 |
"initialize", |
|
| 116 |
signature(.Object = "HypergPrior"), |
|
| 117 |
function(.Object, a, ...) {
|
|
| 118 | ! |
.Object@logDens <- function(g) {
|
| 119 | ! |
return(log(a - 2) - log(2) - a / 2 * log1p(g)) |
| 120 |
} |
|
| 121 | ! |
callNextMethod(.Object, a = a, ...) |
| 122 |
} |
|
| 123 |
) |
|
| 124 | ||
| 125 | ||
| 126 |
##' Constructor for the hyper-g prior class |
|
| 127 |
##' |
|
| 128 |
##' @param a the hyperparameter which must be larger than 3, and should not be larger than 4 |
|
| 129 |
##' in order not to favor too much shrinkage a priori (default: 4) |
|
| 130 |
##' @return a new \code{\linkS4class{HypergPrior}} object
|
|
| 131 |
##' |
|
| 132 |
##' @keywords classes |
|
| 133 |
##' @export |
|
| 134 |
HypergPrior <- function(a = 4) {
|
|
| 135 | ! |
return(new("HypergPrior",
|
| 136 | ! |
a = a |
| 137 |
)) |
|
| 138 |
} |
|
| 139 | ||
| 140 | ||
| 141 |
## ---------------------------------------------------------------------------- |
|
| 142 | ||
| 143 |
##' The inverse gamma g-prior class |
|
| 144 |
##' |
|
| 145 |
##' The slots are: |
|
| 146 |
##' \describe{
|
|
| 147 |
##' \item{a}{the first hyperparameter}
|
|
| 148 |
##' \item{b}{the second hyperparameter}
|
|
| 149 |
##' } |
|
| 150 |
##' |
|
| 151 |
##' @seealso the constructor \code{\link{InvGammaGPrior}}
|
|
| 152 |
##' |
|
| 153 |
##' @name InvGammaGPrior-class |
|
| 154 |
##' @keywords classes |
|
| 155 |
##' @export |
|
| 156 |
setClass( |
|
| 157 |
Class = "InvGammaGPrior", |
|
| 158 |
representation = |
|
| 159 |
representation( |
|
| 160 |
a = "numeric", |
|
| 161 |
b = "numeric" |
|
| 162 |
), |
|
| 163 |
contains = list("GPrior"),
|
|
| 164 |
validity = |
|
| 165 |
function(object) {
|
|
| 166 |
if ((object@a <= 0) || (object@b <= 0)) {
|
|
| 167 |
return("the parameters a and b must be positive")
|
|
| 168 |
} else {
|
|
| 169 |
return(TRUE) |
|
| 170 |
} |
|
| 171 |
} |
|
| 172 |
) |
|
| 173 | ||
| 174 |
##' Initialization method for the "InvGammaGPrior" class |
|
| 175 |
##' |
|
| 176 |
##' @usage \S4method{initialize}{InvGammaGPrior}(.Object, a, b, \dots)
|
|
| 177 |
##' @param .Object the \code{\linkS4class{InvGammaGPrior}} we want to initialize
|
|
| 178 |
##' @param a the first hyperparameter value |
|
| 179 |
##' @param b the second hyperparameter value |
|
| 180 |
##' @param \dots unused |
|
| 181 |
##' @return the initialized object |
|
| 182 |
##' |
|
| 183 |
##' @name InvGammaGPrior-initialize |
|
| 184 |
##' @aliases InvGammaGPrior-initialize initialize,InvGammaGPrior-method |
|
| 185 |
##' |
|
| 186 |
##' @keywords methods internal |
|
| 187 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 188 |
setMethod( |
|
| 189 |
"initialize", |
|
| 190 |
signature(.Object = "InvGammaGPrior"), |
|
| 191 |
function(.Object, a, b, ...) {
|
|
| 192 | 4x |
.Object@logDens <- function(g) {
|
| 193 | 76x |
return(-(a + 1) * log(g) - b / g + a * log(b) - lgamma(a)) |
| 194 |
} |
|
| 195 | 4x |
callNextMethod(.Object, a = a, b = b, ...) |
| 196 |
} |
|
| 197 |
) |
|
| 198 | ||
| 199 |
##' Constructor for the inverse gamma g-prior class |
|
| 200 |
##' |
|
| 201 |
##' @param a the first positive hyperparameter (default: 0.001) |
|
| 202 |
##' @param b the second positive hyperparameter (default: 0.001) |
|
| 203 |
##' @return a new \code{\linkS4class{InvGammaGPrior}} object
|
|
| 204 |
##' |
|
| 205 |
##' @keywords classes |
|
| 206 |
##' @export |
|
| 207 |
InvGammaGPrior <- function(a = 0.001, b = 0.001) {
|
|
| 208 | 4x |
return(new("InvGammaGPrior",
|
| 209 | 4x |
a = a, |
| 210 | 4x |
b = b |
| 211 |
)) |
|
| 212 |
} |
|
| 213 | ||
| 214 | ||
| 215 |
## ---------------------------------------------------------------------------- |
|
| 216 | ||
| 217 |
##' The incomplete inverse gamma g-prior class |
|
| 218 |
##' |
|
| 219 |
##' The slots are: |
|
| 220 |
##' \describe{
|
|
| 221 |
##' \item{a}{the first hyperparameter}
|
|
| 222 |
##' \item{b}{the second hyperparameter}
|
|
| 223 |
##' } |
|
| 224 |
##' |
|
| 225 |
##' @seealso the constructor \code{\link{IncInvGammaGPrior}}
|
|
| 226 |
##' |
|
| 227 |
##' @name IncInvGammaGPrior-class |
|
| 228 |
##' @keywords classes |
|
| 229 |
##' @export |
|
| 230 |
setClass( |
|
| 231 |
Class = "IncInvGammaGPrior", |
|
| 232 |
representation = |
|
| 233 |
representation( |
|
| 234 |
a = "numeric", |
|
| 235 |
b = "numeric" |
|
| 236 |
), |
|
| 237 |
contains = list("GPrior"),
|
|
| 238 |
validity = |
|
| 239 |
function(object) {
|
|
| 240 |
if (object@a <= 0) {
|
|
| 241 |
return("the parameter a must be positive")
|
|
| 242 |
} else if (object@b < 0) {
|
|
| 243 |
return("the parameter b must be non-negative")
|
|
| 244 |
} else {
|
|
| 245 |
return(TRUE) |
|
| 246 |
} |
|
| 247 |
} |
|
| 248 |
) |
|
| 249 | ||
| 250 |
##' Initialization method for the "IncInvGammaGPrior" class |
|
| 251 |
##' |
|
| 252 |
##' @usage \S4method{initialize}{IncInvGammaGPrior}(.Object, a, b, \dots)
|
|
| 253 |
##' @param .Object the \code{\linkS4class{IncInvGammaGPrior}} we want to initialize
|
|
| 254 |
##' @param a the first hyperparameter value |
|
| 255 |
##' @param b the second hyperparameter value |
|
| 256 |
##' @param \dots unused |
|
| 257 |
##' @return the initialized object |
|
| 258 |
##' |
|
| 259 |
##' @name IncInvGammaGPrior-initialize |
|
| 260 |
##' @aliases IncInvGammaGPrior-initialize initialize,IncInvGammaGPrior-method |
|
| 261 |
##' |
|
| 262 |
##' @keywords methods internal |
|
| 263 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 264 |
setMethod( |
|
| 265 |
"initialize", |
|
| 266 |
signature(.Object = "IncInvGammaGPrior"), |
|
| 267 |
function(.Object, a, b, ...) {
|
|
| 268 | ! |
.Object@logDens <- function(g) {
|
| 269 | ! |
normConst <- |
| 270 | ! |
if (b > 0) {
|
| 271 | ! |
a * log(b) - pgamma(b, a, log.p = TRUE) - lgamma(a) |
| 272 |
} else {
|
|
| 273 | ! |
log(a) |
| 274 |
} |
|
| 275 | ||
| 276 | ! |
return(-(a + 1) * log1p(g) - b / (g + 1) + normConst) |
| 277 |
} |
|
| 278 | ! |
callNextMethod(.Object, a = a, b = b, ...) |
| 279 |
} |
|
| 280 |
) |
|
| 281 | ||
| 282 |
##' Constructor for the incomplete inverse gamma g-prior class |
|
| 283 |
##' |
|
| 284 |
##' @param a the first positive hyperparameter (default: 0.001) |
|
| 285 |
##' @param b the second positive hyperparameter (default: 0.001) |
|
| 286 |
##' @return a new \code{\linkS4class{IncInvGammaGPrior}} object
|
|
| 287 |
##' |
|
| 288 |
##' @keywords classes |
|
| 289 |
##' @export |
|
| 290 |
IncInvGammaGPrior <- function(a = 0.001, b = 0.001) {
|
|
| 291 | ! |
return(new("IncInvGammaGPrior",
|
| 292 | ! |
a = a, |
| 293 | ! |
b = b |
| 294 |
)) |
|
| 295 |
} |
|
| 296 | ||
| 297 | ||
| 298 |
## ---------------------------------------------------------------------------- |
|
| 299 | ||
| 300 |
##' The custom g-prior class |
|
| 301 |
##' |
|
| 302 |
##' This class wraps around a custom log prior density for the covariance factor g. |
|
| 303 |
##' |
|
| 304 |
##' @seealso the constructor \code{\link{CustomGPrior}}
|
|
| 305 |
##' |
|
| 306 |
##' @name CustomGPrior-class |
|
| 307 |
##' @keywords classes |
|
| 308 |
##' @export |
|
| 309 |
setClass( |
|
| 310 |
Class = "CustomGPrior", |
|
| 311 |
contains = list("GPrior")
|
|
| 312 |
) |
|
| 313 | ||
| 314 | ||
| 315 |
##' Constructor for the custom g-prior class |
|
| 316 |
##' |
|
| 317 |
##' @param logDens the log prior density function for g |
|
| 318 |
##' @return a new \code{\linkS4class{CustomGPrior}} object
|
|
| 319 |
##' |
|
| 320 |
##' @keywords classes |
|
| 321 |
##' @export |
|
| 322 |
CustomGPrior <- function(logDens) {
|
|
| 323 | 10x |
return(new("CustomGPrior",
|
| 324 | 10x |
logDens = logDens |
| 325 |
)) |
|
| 326 |
} |
|
| 327 | ||
| 328 |
## ---------------------------------------------------------------------------- |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[getLogGPrior.R] by DSB Mit 17/02/2010 15:55 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Helper for glmBayesMfp which returns the normalized log g prior density. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 12/02/2010 split from glmBayesMfp.R file |
|
| 12 |
## 17/02/2010 reduce the lower bound for the integration to the smallest |
|
| 13 |
## value greater than zero instead of the machine epsilon, and compare the |
|
| 14 |
## difference to 1 with machine epsilon instead of the absolute error |
|
| 15 |
## (this is necessary e.g. for IG(0.001, 0.001)) |
|
| 16 |
##################################################################################### |
|
| 17 | ||
| 18 |
##' Helper function for glmBayesMfp: Returns the normalized log g prior density |
|
| 19 |
##' |
|
| 20 |
##' Returns the normalized log of the prior density on g as an R function. |
|
| 21 |
##' It is checked that the result is valid, that is that exp(f) integrates to 1 over the |
|
| 22 |
##' positive real line. |
|
| 23 |
##' |
|
| 24 |
##' @param gPrior the gPrior argument passed to \code{\link{glmBayesMfp}}
|
|
| 25 |
##' |
|
| 26 |
##' @return The R function. |
|
| 27 |
##' |
|
| 28 |
##' @keywords internal |
|
| 29 |
getLogGPrior <- function(gPrior) {
|
|
| 30 |
## get g prior choice |
|
| 31 | ! |
if (isTRUE(is.character(gPrior))) {
|
| 32 |
## which of the choices has been selected by the user? |
|
| 33 | ! |
gPrior <- match.arg(gPrior, |
| 34 | ! |
choices = ("hyperg")
|
| 35 |
) |
|
| 36 | ||
| 37 |
## hyper-g prior with a = 4 |
|
| 38 | ! |
gPrior <- |
| 39 | ! |
if (gPrior == "hyperg") {
|
| 40 | ! |
function(g) -2 * log1p(g) |
| 41 |
} |
|
| 42 |
} |
|
| 43 | ||
| 44 |
## check that the exp of the function it is a valid density function |
|
| 45 | ! |
if (isTRUE(is.function(gPrior))) {
|
| 46 | ! |
XMIN <- .Machine$double.xmin |
| 47 | ! |
EPS <- sqrt(.Machine$double.eps) |
| 48 | ||
| 49 | ! |
integrand <- function(g) exp(gPrior(g)) |
| 50 | ||
| 51 | ! |
integral <- integrate( |
| 52 | ! |
f = integrand, |
| 53 | ! |
lower = XMIN, |
| 54 | ! |
upper = Inf |
| 55 |
) |
|
| 56 | ||
| 57 | ! |
if (integral$message != "OK") {
|
| 58 | ! |
stop(simpleError(integral$message)) |
| 59 |
} |
|
| 60 | ! |
if (abs(integral$value - 1) > EPS) {
|
| 61 | ! |
stop(simpleError("the g-prior density must be proper and normalized"))
|
| 62 |
} |
|
| 63 |
} else {
|
|
| 64 | ! |
stop(simpleError("the g-prior argument must either be a valid string or a (vectorized) density function"))
|
| 65 |
} |
|
| 66 | ||
| 67 |
## return the function |
|
| 68 | ! |
return(gPrior) |
| 69 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[GlmBayesMfp-methods.R] by DSB Fre 07/12/2012 11:10 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Additional convenience methods for GlmBayesMfp class objects. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 12/02/2010 modify from bfp package |
|
| 12 |
## 15/03/2010 adapt print.GlmBayesMfp to new S4 gPrior element |
|
| 13 |
## 07/12/2012 change default for freq in as.data.frame.GlmBayesMfp |
|
| 14 |
##################################################################################### |
|
| 15 | ||
| 16 |
##' @include posteriors.R |
|
| 17 |
NULL |
|
| 18 | ||
| 19 |
##' Extract method for GlmBayesMfp objects |
|
| 20 |
##' |
|
| 21 |
##' Extract a subset of models from a \code{\link{GlmBayesMfp}} object.
|
|
| 22 |
##' |
|
| 23 |
##' |
|
| 24 |
##' @method [ GlmBayesMfp |
|
| 25 |
##' @param x valid \code{\link{GlmBayesMfp}} object
|
|
| 26 |
##' @param \dots transports the indexes of the models |
|
| 27 |
##' @return The subsetted object. |
|
| 28 |
##' @export |
|
| 29 |
##' |
|
| 30 |
##' @name Extract.GlmBayesMfp |
|
| 31 |
##' @aliases Extract.GlmBayesMfp [.GlmBayesMfp |
|
| 32 |
##' @keywords methods |
|
| 33 |
##' @seealso \code{\link{glmBayesMfp}}
|
|
| 34 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 35 |
"[.GlmBayesMfp" <- function(x, ...) {
|
|
| 36 | 83x |
y <- NextMethod("[")
|
| 37 | 83x |
mostattributes(y) <- attributes(x) |
| 38 | 83x |
names(y) <- names(x)[...] |
| 39 | 83x |
class(y) <- oldClass(x) |
| 40 | 83x |
y |
| 41 |
} |
|
| 42 | ||
| 43 | ||
| 44 | ||
| 45 |
##' Print a GlmBayesMfp object. |
|
| 46 |
##' |
|
| 47 |
##' |
|
| 48 |
##' @method print GlmBayesMfp |
|
| 49 |
##' @param x valid \code{\link{GlmBayesMfp}} object
|
|
| 50 |
##' @param \dots unused |
|
| 51 |
##' @return Only used for its side effect |
|
| 52 |
##' |
|
| 53 |
##' @export |
|
| 54 |
##' |
|
| 55 |
##' @keywords methods |
|
| 56 |
##' @seealso \code{\link{glmBayesMfp}}
|
|
| 57 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 58 |
print.GlmBayesMfp <- function(x, ...) {
|
|
| 59 | ! |
cat("------------------------------ GlmBayesMfp-Output ------------------------------\n")
|
| 60 | ! |
cat( |
| 61 | ! |
length(x), "multivariable fractional polynomial model(s) of total (visited/cached)", |
| 62 | ! |
attr(x, "numVisited"), "for following covariates:\n\n" |
| 63 |
) |
|
| 64 | ! |
cat("fixed: ", paste(attr(x, "termNames")$fixed, collapse = ", "), "\n")
|
| 65 | ! |
cat("uncertain fixed form: ", paste(attr(x, "termNames")$uc, collapse = ", "), "\n")
|
| 66 | ! |
cat("fractional polynomial:", paste(attr(x, "termNames")$bfp, collapse = ", "), "\n")
|
| 67 | ! |
pr <- attr(x, "priorSpecs") |
| 68 | ! |
cat( |
| 69 | ! |
"\nPrior for g was,", class(pr$gPrior), |
| 70 | ! |
"\nand a ", pr$modelPrior, " prior on the model space was used.", |
| 71 | ! |
"\n" |
| 72 |
) |
|
| 73 |
} |
|
| 74 | ||
| 75 | ||
| 76 |
##' Convert a GlmBayesMfp object into a data frame |
|
| 77 |
##' |
|
| 78 |
##' |
|
| 79 |
##' @method as.data.frame GlmBayesMfp |
|
| 80 |
##' @param x valid \code{\link{GlmBayesMfp}} object
|
|
| 81 |
##' @param row.names optional rownames (default is to keep the names of the |
|
| 82 |
##' \code{\link{GlmBayesMfp}} list)
|
|
| 83 |
##' @param \dots unused |
|
| 84 |
##' @param freq should empirical frequencies of the models in the sampling |
|
| 85 |
##' path be given? (not default) |
|
| 86 |
##' @return The data frame with the following columns: |
|
| 87 |
##' \describe{
|
|
| 88 |
##' \item{posterior}{the posterior model probabilities}
|
|
| 89 |
##' \item{logMargLik}{the log marginal likelihood of the models}
|
|
| 90 |
##' \item{logPrior}{the log prior probabilities of the models}
|
|
| 91 |
##' } |
|
| 92 |
##' Additionally, for each uncertain fixed form covariates a column with the inclusion |
|
| 93 |
##' status, and for each fractional polynomial a column with the powers are returned. |
|
| 94 |
##' @seealso \code{\link{glmBayesMfp}}
|
|
| 95 |
##' @keywords methods |
|
| 96 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 97 |
##' |
|
| 98 |
##' @export |
|
| 99 |
as.data.frame.GlmBayesMfp <- function(x, |
|
| 100 |
row.names = NULL, |
|
| 101 |
..., |
|
| 102 |
freq = FALSE) {
|
|
| 103 |
## posterior probabilities: |
|
| 104 | 16x |
ret <- data.frame( |
| 105 | 16x |
posterior = posteriors(x, type = "normalized") |
| 106 |
) |
|
| 107 | 16x |
if (!is.null(attr(x, "searchConfig")$chainlength) && freq) {
|
| 108 | ! |
ret$frequency <- posteriors(x, type = "sampling") |
| 109 |
} |
|
| 110 | ||
| 111 |
## row names |
|
| 112 | 16x |
row.names(ret) <- |
| 113 | 16x |
if (!is.null(row.names)) {
|
| 114 | ! |
row.names |
| 115 |
} else {
|
|
| 116 | 16x |
names(x) |
| 117 |
} |
|
| 118 | ||
| 119 |
## log marginal likelihood and log prior |
|
| 120 | 16x |
ret$logMargLik <- logMargLiks(x) |
| 121 | 16x |
ret$logPrior <- logPriors(x) |
| 122 | ||
| 123 |
## fixed form covariates: |
|
| 124 | 16x |
for (i in seq_along(fixNames <- attr(x, "termNames")$fixed[-1])) {
|
| 125 | ! |
ret[[fixNames[i]]] <- sapply( |
| 126 | ! |
x, |
| 127 | ! |
function(one) i %in% one$configuration$fixTerms |
| 128 |
) |
|
| 129 |
} |
|
| 130 | ||
| 131 |
## uncertain fixed form covariates: |
|
| 132 | 16x |
for (i in seq_along(ucNames <- attr(x, "termNames")$uc)) {
|
| 133 | 72x |
ret[[ucNames[i]]] <- sapply( |
| 134 | 72x |
x, |
| 135 | 72x |
function(one) i %in% one$configuration$ucTerms |
| 136 |
) |
|
| 137 |
} |
|
| 138 |
## fractional polynomial: |
|
| 139 | 16x |
for (fpName in attr(x, "termNames")$bfp) {
|
| 140 | 22x |
ret[[fpName]] <- sapply( |
| 141 | 22x |
x, |
| 142 | 22x |
function(one) paste(one$configuration$powers[[fpName]], collapse = ", ") |
| 143 |
) |
|
| 144 |
} |
|
| 145 | ||
| 146 | 16x |
ret |
| 147 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[sampleBma.R] by DSB Die 04/12/2012 09:29 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Produce posterior samples from the Bayesian model average over models |
|
| 9 |
## returned by glmBayesMfp, using MCMC. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 21/05/2010 file creation. |
|
| 13 |
## 24/05/2010 first implementation (without testing) |
|
| 14 |
## 25/05/2010 Do not return modelFreqs; |
|
| 15 |
## return a list with element "samples" from the S4 class |
|
| 16 |
## "GlmBayesMfpSamples". |
|
| 17 |
## test the first implementation. |
|
| 18 |
## 28/05/2010 rename "response" to "fitted", which shall contain the |
|
| 19 |
## *linear predictors* for the fitted data (and not the *means*). |
|
| 20 |
## 26/07/2010 insert many drop=FALSE statements in matrix subsetting |
|
| 21 |
## 03/08/2010 there are always z samples (even in the null model), so we do |
|
| 22 |
## need to catch special cases. |
|
| 23 |
## 26/11/2012 modifications to accommodate the TBF methodology |
|
| 24 |
## 04/12/2012 modifications to accommodate the Cox models |
|
| 25 |
##################################################################################### |
|
| 26 | ||
| 27 |
##' @include GlmBayesMfp-methods.R |
|
| 28 |
##' @include helpers.R |
|
| 29 |
##' @include McmcOptions-class.R |
|
| 30 |
##' @include McmcOptions-methods.R |
|
| 31 |
##' @include sampleGlm.R |
|
| 32 |
NULL |
|
| 33 | ||
| 34 |
##' Produce posterior samples from a Bayesian model average over GLMs / Cox |
|
| 35 |
##' models |
|
| 36 |
##' |
|
| 37 |
##' Based on the result list from \code{\link{glmBayesMfp}}, sample from the
|
|
| 38 |
##' Bayesian model average (BMA) over the models contained in this list. |
|
| 39 |
##' |
|
| 40 |
##' If TBF methodology is used (which is specified within the \code{glmBayesMfp}
|
|
| 41 |
##' object), then Monte Carlo (MC) sampling is used. If the fully Bayesian, |
|
| 42 |
##' generalized hyper-g prior methodology is used, then the sampling is based on |
|
| 43 |
##' MCMC. Therefore, instead of only specifying the required number of samples |
|
| 44 |
##' and the model probabilities, one also needs to specify the burn-in length |
|
| 45 |
##' and the thinning parameter, which will be applied to every model from which |
|
| 46 |
##' at least one sample is included in the average. Alternatively, you can ask |
|
| 47 |
##' for MCMC marginal likelihood estimates for all models in the list. Then at |
|
| 48 |
##' least \code{nMargLikSamples} will be produced for each model, whether
|
|
| 49 |
##' included in the BMA sample or not. |
|
| 50 |
##' |
|
| 51 |
##' @param object valid \code{GlmBayesMfp} object containing the models over
|
|
| 52 |
##' which to average |
|
| 53 |
##' @param mcmc MCMC options object with class \code{\linkS4class{McmcOptions}},
|
|
| 54 |
##' specifying the number of required BMA samples (via \code{sampleSize(mcmc)}),
|
|
| 55 |
##' and the burn-in and thinning parameters applied to each model (see above). |
|
| 56 |
##' If TBF is used, each sample is accepted, and the number of samples is given |
|
| 57 |
##' by \code{\link{sampleSize}}(\code{mcmc}).
|
|
| 58 |
##' @param postProbs vector of posterior probabilities (will be normalized within |
|
| 59 |
##' the function) for the weighting of the models in \code{object} (defaults to
|
|
| 60 |
##' the normalized posterior probabilities) |
|
| 61 |
##' @param nMargLikSamples If this is non-\code{NULL}, it specified the number
|
|
| 62 |
##' of samples used for the marginal likelihood estimate for each model (see |
|
| 63 |
##' above). |
|
| 64 |
##' @param verbose should information on computation progress be given? |
|
| 65 |
##' (default) |
|
| 66 |
##' @param \dots optional further arguments already available for sampling from |
|
| 67 |
##' a single model: \code{gridList}, \code{gridSize}, \code{newdata},
|
|
| 68 |
##' \code{weights}, \code{marginalZApprox}, \code{debug}, \code{useOpenMP}.
|
|
| 69 |
##' See \code{\link{sampleGlm}} for the meanings.
|
|
| 70 |
##' |
|
| 71 |
##' @return The result is a list with the following elements: |
|
| 72 |
##' \describe{
|
|
| 73 |
##' \item{modelData}{data frame containing the result from the
|
|
| 74 |
##' \code{as.data.frame} function, and in addition BMA probabilities,
|
|
| 75 |
##' BMA frequencies in the sample, acceptance ratios of the MCMC |
|
| 76 |
##' runs and optionally marginal likelihood estimates / standard |
|
| 77 |
##' errors.} |
|
| 78 |
##' \item{samples}{an object of S4 class \code{\linkS4class{GlmBayesMfpSamples}}
|
|
| 79 |
##' containing the samples from the BMA.} |
|
| 80 |
##' } |
|
| 81 |
##' |
|
| 82 |
##' @keywords models regression |
|
| 83 |
##' @export |
|
| 84 |
sampleBma <- |
|
| 85 |
function(object, |
|
| 86 |
mcmc = McmcOptions(), |
|
| 87 |
postProbs = posteriors(object), |
|
| 88 |
nMargLikSamples = NULL, |
|
| 89 |
verbose = TRUE, |
|
| 90 |
...) {
|
|
| 91 |
## Checks |
|
| 92 |
## ************************************************** |
|
| 93 | ||
| 94 |
## check the object |
|
| 95 | 10x |
if (!inherits(object, "GlmBayesMfp")) {
|
| 96 | ! |
stop(simpleError("object must be of class GlmBayesMfp"))
|
| 97 |
} |
|
| 98 | ||
| 99 | 10x |
nModels <- length(object) |
| 100 | 10x |
if (!(nModels >= 1L)) {
|
| 101 | ! |
stop(simpleError("there has to be at least one model in the object"))
|
| 102 |
} |
|
| 103 | ||
| 104 |
## get attributes |
|
| 105 | 10x |
attrs <- attributes(object) |
| 106 | ||
| 107 |
## check whether tbf is used |
|
| 108 | 10x |
tbf <- attrs$distribution$tbf |
| 109 | ||
| 110 |
## is this a GLM or a Cox model? |
|
| 111 | 10x |
doGlm <- attrs$distribution$doGlm |
| 112 | ||
| 113 |
## shall we do marginal likelihood estimations? |
|
| 114 | 10x |
estimateMargLik <- (!tbf) && (!is.null(nMargLikSamples)) |
| 115 | 10x |
if (estimateMargLik) {
|
| 116 |
## then check if everything is OK |
|
| 117 | ! |
stopifnot( |
| 118 | ! |
is.numeric(nMargLikSamples), |
| 119 | ! |
nMargLikSamples > 100 |
| 120 |
) |
|
| 121 | ! |
nMargLikSamples <- as.integer(nMargLikSamples) |
| 122 |
} |
|
| 123 | ||
| 124 |
## other checks |
|
| 125 | 10x |
stopifnot( |
| 126 | 10x |
is.bool(verbose), |
| 127 | 10x |
is(mcmc, "McmcOptions"), |
| 128 | 10x |
identical( |
| 129 | 10x |
nModels, |
| 130 | 10x |
length(postProbs) |
| 131 |
), |
|
| 132 | 10x |
postProbs >= 0 |
| 133 |
) |
|
| 134 | ||
| 135 |
## correct MCMC option |
|
| 136 | 10x |
if (tbf) {
|
| 137 | 10x |
mcmc <- McmcOptions( |
| 138 | 10x |
burnin = 0L, |
| 139 | 10x |
step = 1L, |
| 140 | 10x |
samples = sampleSize(mcmc) |
| 141 |
) |
|
| 142 |
} |
|
| 143 | ||
| 144 |
## Start samples containers |
|
| 145 |
## ************************************************** |
|
| 146 | ||
| 147 |
## "matrices" |
|
| 148 | 10x |
fitted <- matrix( |
| 149 | 10x |
nrow = attrs$data$nObs, |
| 150 | 10x |
ncol = 0L |
| 151 |
) |
|
| 152 | ||
| 153 |
## we cannot give here the right number of rows, |
|
| 154 |
## so use NULL to which we can cbind anything |
|
| 155 | 10x |
predictions <- NULL |
| 156 | ||
| 157 |
## vectors |
|
| 158 | 10x |
z <- numeric() |
| 159 | ||
| 160 |
## lists |
|
| 161 | 10x |
bfpCurves <- |
| 162 | 10x |
fixCoefs <- |
| 163 | 10x |
ucCoefs <- list() |
| 164 | ||
| 165 |
## Distribute samples to models |
|
| 166 |
## ************************************************** |
|
| 167 | ||
| 168 |
## determine the sample size |
|
| 169 | 10x |
nSamples <- sampleSize(mcmc) |
| 170 | ||
| 171 |
## determine the model names and the normalized weights |
|
| 172 | 10x |
objNames <- as.numeric(names(object)) |
| 173 | 10x |
postProbs <- postProbs / sum(postProbs) |
| 174 | ||
| 175 |
## be sure that at least one model has positive weight |
|
| 176 | 10x |
stopifnot(any(postProbs > 0)) |
| 177 | ||
| 178 |
## draw model names |
|
| 179 | 10x |
modelFreqs <- |
| 180 | 10x |
if (identical(length(objNames), 1L)) {
|
| 181 |
# if there is only one model ... |
|
| 182 | 9x |
rep(objNames, nSamples) |
| 183 |
} else {
|
|
| 184 |
# else more than one model ... |
|
| 185 | 1x |
sample( |
| 186 | 1x |
objNames, |
| 187 | 1x |
size = nSamples, |
| 188 | 1x |
replace = TRUE, |
| 189 | 1x |
prob = postProbs |
| 190 |
) |
|
| 191 |
} |
|
| 192 | 10x |
modelFreqs <- table(modelFreqs) |
| 193 | 10x |
nams <- names(modelFreqs) |
| 194 | ||
| 195 |
## save model summary |
|
| 196 | 10x |
modelData <- as.data.frame(object) |
| 197 | 10x |
modelData[, c("bmaProb", "bmaFreq")] <- 0
|
| 198 | 10x |
modelData[, "bmaProb"] <- postProbs |
| 199 | 10x |
modelData[nams, "bmaFreq"] <- modelFreqs / nSamples |
| 200 | ||
| 201 |
## prepare for acceptance rates |
|
| 202 | 10x |
modelData[, "acceptanceRatio"] <- 0 |
| 203 | ||
| 204 |
## prepare for marginal likelihood estimates |
|
| 205 | 10x |
if (estimateMargLik) {
|
| 206 | ! |
modelData[, c("margLikEstimate", "margLikError")] <- 0
|
| 207 |
} |
|
| 208 | ||
| 209 |
## Start sampling |
|
| 210 |
## ************************************************** |
|
| 211 | ||
| 212 |
## echo sampling start |
|
| 213 | 10x |
if (verbose) {
|
| 214 | 1x |
cat("\nStarting sampling ...")
|
| 215 |
} |
|
| 216 | ||
| 217 |
## now sample from each model as often as indicated by the modelFreqs |
|
| 218 |
## and save samples from it |
|
| 219 | ||
| 220 |
## process every model in object |
|
| 221 | 10x |
for (j in seq_along(object)) {
|
| 222 | 66x |
if (verbose) {
|
| 223 | 57x |
cat("\nNow at model ", j, "...")
|
| 224 |
} |
|
| 225 | ||
| 226 |
## get this model |
|
| 227 | 66x |
thisModel <- object[j] |
| 228 | 66x |
modName <- names(thisModel) |
| 229 | ||
| 230 |
## determine if this model's samples are in BMA samples |
|
| 231 | 66x |
inBma <- modName %in% nams |
| 232 | ||
| 233 |
## determine the number of samples we need from this model |
|
| 234 | 66x |
thisSampleSize <- |
| 235 | 66x |
if (inBma) {
|
| 236 |
## if it is in the BMA, be sure that we have at least |
|
| 237 |
## nMargLikSamples samples (if this is NULL, it counts as 0) |
|
| 238 | 40x |
max( |
| 239 | 40x |
modelFreqs[modName], |
| 240 | 40x |
nMargLikSamples |
| 241 |
) |
|
| 242 |
} else {
|
|
| 243 |
## else we only sample if we want marginal likelihood estimates |
|
| 244 | 26x |
max( |
| 245 | 26x |
nMargLikSamples, |
| 246 | 26x |
0L |
| 247 |
) |
|
| 248 |
} |
|
| 249 | ||
| 250 |
## decide if we need samples from this model |
|
| 251 | 66x |
if (thisSampleSize > 0L) {
|
| 252 |
## Sample from this model |
|
| 253 |
## ************************************************** |
|
| 254 | ||
| 255 |
## adapt Mcmc object to reflect the correct number of samples |
|
| 256 | 40x |
thisMcmc <- McmcOptions( |
| 257 | 40x |
samples = thisSampleSize, |
| 258 | 40x |
burnin = mcmc@burnin, |
| 259 | 40x |
step = mcmc@step |
| 260 |
) |
|
| 261 | ||
| 262 |
## and run the sampleGlm function |
|
| 263 | 40x |
thisOut <- sampleGlm( |
| 264 | 40x |
object = thisModel, |
| 265 | 40x |
mcmc = thisMcmc, |
| 266 | 40x |
estimateMargLik = estimateMargLik, |
| 267 | 40x |
verbose = verbose, # pass the verbose option |
| 268 |
# from this function |
|
| 269 |
... |
|
| 270 | 40x |
) # and be sure to pass all other |
| 271 |
# options |
|
| 272 | ||
| 273 | ||
| 274 |
## Save results |
|
| 275 |
## ************************************************** |
|
| 276 | ||
| 277 |
## which samples do we keep from this model? |
|
| 278 | 40x |
keepSamples <- |
| 279 | 40x |
if (inBma) {
|
| 280 |
## the last modelFreqs[modName] samples |
|
| 281 | 40x |
seq( |
| 282 | 40x |
from = thisSampleSize - modelFreqs[modName] + 1L, |
| 283 | 40x |
to = thisSampleSize |
| 284 |
) |
|
| 285 |
} else {
|
|
| 286 |
## none |
|
| 287 | ! |
integer() |
| 288 |
} |
|
| 289 | ||
| 290 |
## save acceptance ratio |
|
| 291 | 40x |
modelData[modName, "acceptanceRatio"] <- |
| 292 | 40x |
thisOut$acceptanceRatio |
| 293 | ||
| 294 |
## save the marginal likelihood estimates, if required |
|
| 295 | 40x |
if (estimateMargLik) {
|
| 296 | ! |
modelData[modName, c("margLikEstimate", "margLikError")] <-
|
| 297 | ! |
unlist(thisOut$logMargLik[c("estimate", "standardError")])
|
| 298 |
} |
|
| 299 | ||
| 300 |
## save the predictive samples, if there are any |
|
| 301 | 40x |
if (nrow(thisOut$samples@predictions) > 0L) {
|
| 302 | ! |
predictions <- cbind( |
| 303 | ! |
predictions, |
| 304 | ! |
thisOut$samples@predictions[, keepSamples, drop = FALSE] |
| 305 |
) |
|
| 306 |
} |
|
| 307 | ||
| 308 |
## save z samples |
|
| 309 | 40x |
z <- c( |
| 310 | 40x |
z, |
| 311 | 40x |
thisOut$samples@z[keepSamples] |
| 312 |
) |
|
| 313 | ||
| 314 |
## save all fixed coefs samples |
|
| 315 | 40x |
for (fixName in names(thisOut$samples@fixCoefs)) {
|
| 316 |
## get these samples |
|
| 317 | ! |
thisFixSamples <- thisOut$samples@fixCoefs[[fixName]] |
| 318 | ||
| 319 |
## append these samples |
|
| 320 | ! |
fixCoefs[[fixName]] <- |
| 321 | ! |
cbind( |
| 322 | ! |
fixCoefs[[fixName]], |
| 323 | ! |
thisFixSamples[, keepSamples, drop = FALSE] |
| 324 |
) |
|
| 325 |
} |
|
| 326 | ||
| 327 | ||
| 328 | ||
| 329 | ||
| 330 |
## save the fit samples on the linear predictor scale |
|
| 331 | 40x |
fitted <- cbind( |
| 332 | 40x |
fitted, |
| 333 | 40x |
thisOut$samples@fitted[, keepSamples, drop = FALSE] |
| 334 |
) |
|
| 335 | ||
| 336 |
## save all bfp curve samples |
|
| 337 | 40x |
for (bfpName in names(thisOut$samples@bfpCurves)) {
|
| 338 |
## get these samples |
|
| 339 | 2x |
thisBfpSamples <- thisOut$samples@bfpCurves[[bfpName]] |
| 340 | ||
| 341 |
## append these samples |
|
| 342 | 2x |
bfpCurves[[bfpName]] <- |
| 343 | 2x |
cbind( |
| 344 | 2x |
bfpCurves[[bfpName]], |
| 345 | 2x |
thisBfpSamples[, keepSamples, drop = FALSE] |
| 346 |
) |
|
| 347 | ||
| 348 |
## copy the grid attributes |
|
| 349 |
## (actually we would not need to copy them every time) |
|
| 350 | 2x |
attr(bfpCurves[[bfpName]], "scaledGrid") <- |
| 351 | 2x |
attr(thisBfpSamples, "scaledGrid") |
| 352 | 2x |
attr(bfpCurves[[bfpName]], "whereObsVals") <- |
| 353 | 2x |
attr(thisBfpSamples, "whereObsVals") |
| 354 |
} |
|
| 355 | ||
| 356 |
## save all uc coefs samples |
|
| 357 | 40x |
for (ucName in names(thisOut$samples@ucCoefs)) {
|
| 358 |
## get these samples |
|
| 359 | 119x |
thisUcSamples <- thisOut$samples@ucCoefs[[ucName]] |
| 360 | ||
| 361 |
## append these samples |
|
| 362 | 119x |
ucCoefs[[ucName]] <- |
| 363 | 119x |
cbind( |
| 364 | 119x |
ucCoefs[[ucName]], |
| 365 | 119x |
thisUcSamples[, keepSamples, drop = FALSE] |
| 366 |
) |
|
| 367 |
} |
|
| 368 |
} |
|
| 369 |
} |
|
| 370 | ||
| 371 |
## Collect things in return list |
|
| 372 |
## ************************************************** |
|
| 373 | ||
| 374 |
## be sure that predictions is a matrix, even if we have no |
|
| 375 |
## predictive samples |
|
| 376 | 10x |
predictions <- |
| 377 | 10x |
if (is.null(predictions)) {
|
| 378 | 10x |
matrix(nrow = 0, ncol = 0) |
| 379 |
} else {
|
|
| 380 | ! |
predictions |
| 381 |
} |
|
| 382 | ||
| 383 | 10x |
ret <- list( |
| 384 | 10x |
modelData = modelData, |
| 385 | 10x |
samples = |
| 386 | 10x |
new( |
| 387 | 10x |
"GlmBayesMfpSamples", |
| 388 | 10x |
fitted = fitted, |
| 389 | 10x |
predictions = predictions, |
| 390 | 10x |
fixCoefs = fixCoefs, |
| 391 | 10x |
z = z, |
| 392 | 10x |
bfpCurves = bfpCurves, |
| 393 | 10x |
ucCoefs = ucCoefs, |
| 394 | 10x |
shiftScaleMax = attrs$shiftScaleMax, |
| 395 | 10x |
nSamples = nSamples |
| 396 |
) |
|
| 397 |
) |
|
| 398 | ||
| 399 | ||
| 400 |
## finally return the whole stuff. |
|
| 401 | 10x |
return(ret) |
| 402 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Description: |
|
| 6 |
## Friendlier interface to fit Cox models with glmBayesMfp |
|
| 7 |
## |
|
| 8 |
## History: |
|
| 9 |
## 14/07/2015 Copy from CoxTBFs project |
|
| 10 |
##################################################################################### |
|
| 11 | ||
| 12 |
##' @include helpers.R |
|
| 13 |
##' @include getDesignMatrix.R |
|
| 14 |
NULL |
|
| 15 | ||
| 16 | ||
| 17 |
##' Fit Cox models using glmBayesMfp |
|
| 18 |
##' |
|
| 19 |
##' A simplified formula based interface to \code{\link{glmBayesMfp}} to fit Cox models. Can return
|
|
| 20 |
##' Maximum a posteriori (MAP) model, Median probability model (MPM) or Bayesian model average |
|
| 21 |
##' (BMA). Provides global empirical Bayes and AIC/BIC based model inference. |
|
| 22 |
##' |
|
| 23 |
##' @param formula model formula with Surv object as LHS and \code{\link{uc}} or \code{\link{bfp}}
|
|
| 24 |
##' variables as RHS. |
|
| 25 |
##' @param data data.frame for model variables |
|
| 26 |
##' @param type type of model to fit, one of "MAP","MPM","BMA","BMAFull" |
|
| 27 |
##' @param baseline how to calculate the baseline hazard function. "cox" uses unshrunken |
|
| 28 |
##' coefficients. "shrunk" refits baseline with shrunken coefficients (default). |
|
| 29 |
##' @param globalEB use global empirical bayes estimate of g (default=FALSE) |
|
| 30 |
##' @param IC use information criteria based model selection (default=FALSE). Either "AIC" or "BIC". |
|
| 31 |
##' @param sep estimate baseline hazard for each estimate of model coefficients (default=FALSE). |
|
| 32 |
##' @param keepModelList keep the model list returned by glmBayesMfp for MAP and MPM models |
|
| 33 |
##' (default=FALSE). |
|
| 34 |
##' @param ... additional arguments to pass to \code{\link{glmBayesMfp}}
|
|
| 35 |
##' @param overrideConfig replaces the the MAP model with the given configuration, which is passed |
|
| 36 |
##' to \code{\link{computeModels}}
|
|
| 37 |
##' |
|
| 38 |
##' @return An object of S3 class \code{TBFcox} or \code{TBFcox.sep} if sep=TRUE.
|
|
| 39 |
##' |
|
| 40 |
##' @keywords models regression |
|
| 41 |
##' |
|
| 42 |
##' @import stats |
|
| 43 |
##' @export |
|
| 44 |
coxTBF <- function(formula, |
|
| 45 |
data, |
|
| 46 |
type = "MAP", |
|
| 47 |
baseline = "shrunk", |
|
| 48 |
globalEB = FALSE, |
|
| 49 |
IC = FALSE, |
|
| 50 |
sep = FALSE, |
|
| 51 |
keepModelList = FALSE, |
|
| 52 |
..., |
|
| 53 |
overrideConfig) {
|
|
| 54 | 10x |
formula <- as.formula(formula) |
| 55 | ||
| 56 | 10x |
LHS <- formula[[2]][[2]] |
| 57 | 10x |
RHS <- paste(attr(terms(formula), "term.labels"), collapse = " + ") |
| 58 | 10x |
selection.formula <- formula(paste(LHS, "~", RHS)) |
| 59 | ||
| 60 | 10x |
tryCatch( |
| 61 | 10x |
type <- match.arg(type, c("MAP", "MPM", "BMA", "BMAfull")),
|
| 62 | 10x |
error = function(e) stop(paste("Invalid value for type: ", type))
|
| 63 |
) |
|
| 64 | 10x |
tryCatch( |
| 65 | 10x |
baseline <- match.arg(baseline, c("cox", "shrunk")),
|
| 66 | 10x |
error = function(e) stop("Invalid baseline specification")
|
| 67 |
) |
|
| 68 | ||
| 69 | 10x |
time.var <- as.character(formula[[2]][[2]]) |
| 70 | 10x |
status.var <- as.character(formula[[2]][[3]]) |
| 71 | ||
| 72 |
# Set up the g-prior |
|
| 73 | 10x |
nEvents <- sum(data[[status.var]]) |
| 74 | ||
| 75 | 10x |
prior.hypergn <- CustomGPrior(logDens = function(g) {
|
| 76 | 160632x |
return(-log(nEvents) - 2 * log1p(g / nEvents)) |
| 77 |
}) |
|
| 78 | ||
| 79 |
############################################################################################# |
|
| 80 |
# Handle Global Empirical Bayes |
|
| 81 |
# First we need to fit the model, then we can hand a fixed g to the normal search procedure |
|
| 82 | 10x |
if (globalEB == TRUE) {
|
| 83 | 1x |
full.model.space <- 2^length(labels(terms(selection.formula))) |
| 84 | ||
| 85 | 1x |
gEB.models <- glmBayesMfp(selection.formula, |
| 86 | 1x |
censInd = data[[status.var]], |
| 87 | 1x |
data = data, |
| 88 | 1x |
tbf = TRUE, |
| 89 | 1x |
empiricalBayes = TRUE, |
| 90 | 1x |
priorSpecs = list(gPrior = prior.hypergn, modelPrior = "dependent"), |
| 91 | 1x |
nModels = full.model.space, |
| 92 | 1x |
method = "exhaustive", |
| 93 | 1x |
verbose = FALSE |
| 94 |
) |
|
| 95 | ||
| 96 | 1x |
k <- length(gEB.models) |
| 97 | 1x |
deviances <- sapply(1:k, function(i) gEB.models[[i]]$information$residualDeviance) |
| 98 | 1x |
log.prior.prob <- sapply(1:k, function(i) gEB.models[[i]]$information$logPrior) |
| 99 | ||
| 100 | 1x |
ucList <- attr(gEB.models, "indices")$ucList |
| 101 | 1x |
degrees <- sapply(1:k, function(i) {
|
| 102 | 64x |
length(unlist(ucList[gEB.models[[i]]$configuration$ucTerms])) |
| 103 |
}) |
|
| 104 | 1x |
valid <- is.na(deviances) != TRUE |
| 105 | ||
| 106 | 1x |
TBF <- function(g) sum(((g + 1)^(-degrees / 2) * exp(g / (g + 1) * deviances / 2) * exp(log.prior.prob))[valid]) |
| 107 | ||
| 108 | 1x |
globalEB.fun <- function(g) sapply(g, TBF) |
| 109 | ||
| 110 | 1x |
bestg <- optimise(f = globalEB.fun, interval = c(0.5, 1000), maximum = TRUE)$maximum |
| 111 | 1x |
print(paste("Global EB chooses g =", bestg))
|
| 112 |
} |
|
| 113 |
# end globalEB |
|
| 114 |
############################################################################################# |
|
| 115 | ||
| 116 | 10x |
args1 <- list(selection.formula, |
| 117 | 10x |
censInd = data[[status.var]], |
| 118 | 10x |
data = data, |
| 119 | 10x |
tbf = TRUE, |
| 120 | 10x |
priorSpecs = list(gPrior = prior.hypergn, modelPrior = "dependent"), |
| 121 | 10x |
chainlength = 5000, |
| 122 | 10x |
method = "sampling", |
| 123 | 10x |
verbose = FALSE |
| 124 |
) |
|
| 125 | ||
| 126 | 10x |
inargs <- list(...) |
| 127 | ||
| 128 | 2x |
if (IC != FALSE) inargs <- c(inargs, fixedg = 10 * nrow(data)) |
| 129 | ||
| 130 | 10x |
if (globalEB && exists("bestg")) {
|
| 131 | 1x |
inargs <- c(inargs, fixedg = bestg) |
| 132 |
} |
|
| 133 | ||
| 134 |
# match args and ... |
|
| 135 | 10x |
if (length(inargs)) {
|
| 136 | 6x |
for (i in seq_along(inargs)) {
|
| 137 | 6x |
args1[[names(inargs)[i]]] <- inargs[[i]] |
| 138 |
} |
|
| 139 |
} |
|
| 140 | ||
| 141 |
# fit the model using TBF method |
|
| 142 | 10x |
models <- do.call(glmBayesMfp, args1) |
| 143 | ||
| 144 | 10x |
this.call <- match.call() |
| 145 | ||
| 146 | 10x |
if (globalEB) {
|
| 147 | 1x |
geb <- which(names(this.call) == "globalEB") |
| 148 | 1x |
names(this.call)[geb] <- "fixedg" |
| 149 | 1x |
this.call[geb] <- bestg |
| 150 |
} |
|
| 151 | ||
| 152 |
# If we are doing AIC/BIC we need to fit models, calculate IC values and new probabilities. |
|
| 153 | 10x |
if (IC != FALSE && type != "BMAfull") {
|
| 154 | 1x |
IC.values <- numeric(length(models)) |
| 155 | 1x |
nullModel <- NA |
| 156 | ||
| 157 | ||
| 158 | 1x |
design.names <- colnames(attr(models, "data")$xCentered) |
| 159 | ||
| 160 | 1x |
new.data <- data.frame(cbind( |
| 161 | 1x |
status.var = attr(models, "data")$censInd, |
| 162 | 1x |
time.var = attr(models, "data")$y, |
| 163 | 1x |
attr(models, "data")$xCentered[, -1] |
| 164 |
)) |
|
| 165 | ||
| 166 | 1x |
colnames(new.data)[1:2] <- c(status.var, time.var) |
| 167 | ||
| 168 | ||
| 169 | ||
| 170 | 1x |
for (j in seq_along(models)) {
|
| 171 |
# new.design.matrix <- getDesignMatrix(object=models[j])[,-1,drop=FALSE] |
|
| 172 |
# |
|
| 173 |
# new.data <- data.frame(cbind(status.var=attributes(models[j])$data$censInd, |
|
| 174 |
# time.var=attributes(models[j])$data$y, |
|
| 175 |
# new.design.matrix)) |
|
| 176 |
# |
|
| 177 |
# colnames(new.data)[1:2] <- c(status.var,time.var) |
|
| 178 | ||
| 179 |
# covariate indices corresponding to data frame |
|
| 180 | 1x |
ind <- unlist(attr(models, "indices")$ucList[models[[j]]$configuration$ucTerms]) |
| 181 | ||
| 182 | ||
| 183 | 1x |
model.formula <- reformulate( |
| 184 | 1x |
setdiff(colnames(new.data), c(time.var, status.var)), |
| 185 | 1x |
response = paste("survival::Surv(", time.var, ",", status.var, ")")
|
| 186 |
) |
|
| 187 | ||
| 188 | 1x |
model.cph <- rms::cph(formula(model.formula), data = new.data, surv = FALSE, se.fit = FALSE, y = FALSE, x = FALSE) |
| 189 | ||
| 190 |
# if(j%%10==0) print(j) |
|
| 191 | ||
| 192 | ||
| 193 | ! |
if (IC == "AIC") IC.values[j] <- -AIC(model.cph) / 2 |
| 194 | ||
| 195 | 1x |
if (IC == "BIC") IC.values[j] <- -AIC(model.cph, k = log(sum(new.data[status.var]))) / 2 |
| 196 | ||
| 197 |
# save this model so we can delete it |
|
| 198 | 1x |
if (is.na(IC.values[j]) || length(models[[j]]$configuration$ucTerms) == 0) {
|
| 199 | ! |
nullModel <- j |
| 200 |
} |
|
| 201 |
} |
|
| 202 | ||
| 203 |
# delete null model since it is can't be used later |
|
| 204 | 1x |
if (!is.na(nullModel)) {
|
| 205 | ! |
IC.values <- IC.values[-nullModel] |
| 206 | ! |
models <- models[-nullModel] |
| 207 |
} |
|
| 208 | 1x |
models.df <- as.data.frame(models) |
| 209 | ||
| 210 | ||
| 211 | 1x |
IC.posteriors <- exp((IC.values - max(IC.values)) + models.df$logPrior) |
| 212 | 1x |
IC.posteriors <- IC.posteriors / sum(IC.posteriors) |
| 213 | ||
| 214 | 1x |
if (type == "MPM") {
|
| 215 | ! |
attr(models, "inclusionProbs") <- apply(models.df[, -(1:3)], 2, function(x) sum(x * IC.posteriors)) |
| 216 |
} |
|
| 217 | ||
| 218 | 1x |
if (type == "MAP") {
|
| 219 | 1x |
models[1] <- models[which.max(IC.posteriors)] |
| 220 |
# also include inclusion probs for *IC methods |
|
| 221 | 1x |
attr(models, "inclusionProbs") <- apply(models.df[, -(1:3)], 2, function(x) sum(x * IC.posteriors)) |
| 222 |
} |
|
| 223 |
} |
|
| 224 |
# End AIC/BIC |
|
| 225 | ||
| 226 | ||
| 227 | ||
| 228 | ||
| 229 | ||
| 230 | 10x |
if (type == "MAP") {
|
| 231 | 7x |
model.listpart <- models[1] |
| 232 | ||
| 233 |
# if overrideConfig exists, we use it instead of MPM to fit the desired model. |
|
| 234 | 7x |
if (!missing(overrideConfig)) {
|
| 235 | ! |
print("Using overrideConfig model")
|
| 236 | ||
| 237 | ! |
new.models <- computeModels(list(overrideConfig), models) |
| 238 | ! |
model.listpart <- new.models[1] |
| 239 |
} |
|
| 240 | 3x |
} else if (type == "MPM") {
|
| 241 |
# what are the covariates with inclusion probability >0.5? |
|
| 242 | 1x |
mpm.vars <- attr(models, "inclusionProbs") > 0.5 |
| 243 |
# which model includes only these? |
|
| 244 | 1x |
model.df <- as.data.frame(models)[, -c(1:3)] |
| 245 | 1x |
mpm.index <- which(sapply(seq_along(models), function(i) all(model.df[i, ] == mpm.vars))) |
| 246 | 1x |
if (!any(mpm.index)) {
|
| 247 |
# if the mpm wasn't fitted we must construct it |
|
| 248 | ! |
print("MPM model wasn't fitted so we construct it.")
|
| 249 | ! |
mpm.model.config <- models[[1]]$configuration |
| 250 | ! |
mpm.model.config$ucTerms <- which(mpm.vars) |
| 251 | ||
| 252 | ! |
new.models <- computeModels(list(mpm.model.config), models) |
| 253 | ! |
model.listpart <- new.models[1] |
| 254 |
# print("Success.s")
|
|
| 255 |
} else {
|
|
| 256 | 1x |
model.listpart <- models[mpm.index] |
| 257 |
} |
|
| 258 | 2x |
} else if (type == "BMAfull") {
|
| 259 |
# BMA-FULL ---------------------------------------------------------------- |
|
| 260 | 1x |
bma.surv <- list() |
| 261 | 1x |
bma.coefs <- list() |
| 262 | 1x |
bma.survfits <- list() |
| 263 | 1x |
surv.time <- numeric() |
| 264 | ||
| 265 | 1x |
models[which(sapply( |
| 266 | 1x |
seq_along(models), |
| 267 | 1x |
function(i) length(models[i][[1]]$configuration$ucTerms) |
| 268 | 1x |
) == 0)] <- NULL |
| 269 | ||
| 270 |
# if we are doing AIC or BIC we need to calculate (and store) the values separately |
|
| 271 | 1x |
if (IC != FALSE) IC.values <- numeric(length(models)) |
| 272 | ||
| 273 | 1x |
for (j in seq_along(models)) {
|
| 274 | 1x |
new.design.matrix <- getDesignMatrix(object = models[j], intercept = FALSE) |
| 275 | ||
| 276 | 1x |
new.data <- data.frame(cbind( |
| 277 | 1x |
status.var = attributes(models[j])$data$censInd, |
| 278 | 1x |
time.var = attributes(models[j])$data$y, |
| 279 | 1x |
new.design.matrix |
| 280 |
)) |
|
| 281 | ||
| 282 | 1x |
colnames(new.data)[1:2] <- c(status.var, time.var) |
| 283 | ||
| 284 | 1x |
model.formula <- reformulate( |
| 285 | 1x |
setdiff(colnames(new.data), c(time.var, status.var)), |
| 286 | 1x |
response = paste("survival::Surv(", time.var, ",", status.var, ")")
|
| 287 |
) |
|
| 288 | ||
| 289 |
# calculate values for each model |
|
| 290 | 1x |
bma.coefs[[j]] <- getModelCoefs(models[j], |
| 291 | 1x |
mcmc = McmcOptions(burnin = 0L, step = 1L, samples = 100) |
| 292 |
) |
|
| 293 |
# print(colnames(new.data)) |
|
| 294 | 1x |
model.cph <- rms::cph( |
| 295 | 1x |
formula(model.formula), |
| 296 | 1x |
data = new.data, |
| 297 | 1x |
surv = TRUE, |
| 298 | 1x |
se.fit = FALSE, |
| 299 | 1x |
y = TRUE, |
| 300 | 1x |
x = TRUE |
| 301 |
) |
|
| 302 | 1x |
if (j == 1) surv.time <- model.cph$time |
| 303 | ||
| 304 | 1x |
if (baseline == "cox") {
|
| 305 | ! |
bma.surv[[j]] <- rms::Survival(model.cph) |
| 306 | ||
| 307 | ! |
shrunk.survfit <- survival::survfit(model.cph) |
| 308 | ! |
shrunk.surv <- c(1, shrunk.survfit$surv) |
| 309 | ! |
if (length(shrunk.surv) < length(model.cph$surv)) {
|
| 310 | ! |
shrunk.surv <- c(shrunk.surv, shrunk.surv[length(shrunk.surv)]) |
| 311 |
} |
|
| 312 | ! |
bma.survfits[[j]] <- shrunk.surv |
| 313 |
} |
|
| 314 | ||
| 315 | 1x |
if (baseline == "shrunk") {
|
| 316 |
# Take cox model object, put in shrunken coefficients, re-estimate baseline hazard |
|
| 317 | 1x |
shrunk.cph <- model.cph |
| 318 | 1x |
shrunk.mm <- new.design.matrix # model.matrix(model.formula, data=data)[,-1] |
| 319 |
# shrunk.mm <-scale(shrunk.mm) |
|
| 320 | 1x |
shrunk.cph$linear.predictors <- shrunk.mm %*% bma.coefs[[j]] |
| 321 | ||
| 322 | ||
| 323 |
# shrunk.survfit <- survival::survfit(shrunk.cph) |
|
| 324 | ||
| 325 | 1x |
shrunk.survfit <- try(survival::survfit(shrunk.cph)) |
| 326 | ||
| 327 | 1x |
if (class(shrunk.survfit)[1] == "try-error") {
|
| 328 | ! |
this.model <- models[j] |
| 329 | ! |
these.coefs <- bma.coefs[[j]] |
| 330 | ! |
save("model.cph", "shrunk.mm", "these.coefs", "this.model", file = "Faulty.Files.Rdata")
|
| 331 | ||
| 332 | ! |
g <- exp(models[[j]]$information$zMode) |
| 333 | ! |
shrunk.cph$linear.predictors <- shrunk.mm %*% (model.cph$coefficients * g / (g + 1)) |
| 334 | ! |
shrunk.survfit <- try(survival::survfit(shrunk.cph)) |
| 335 |
} |
|
| 336 | ||
| 337 | ||
| 338 | 1x |
shrunk.surv <- c(1, shrunk.survfit$surv) |
| 339 | 1x |
if (length(shrunk.surv) < length(model.cph$surv)) {
|
| 340 | ! |
shrunk.surv <- c(shrunk.surv, shrunk.surv[length(shrunk.surv)]) |
| 341 |
} |
|
| 342 | 1x |
shrunk.cph$surv <- shrunk.surv |
| 343 | ||
| 344 | 1x |
bma.surv[[j]] <- rms::Survival(shrunk.cph) |
| 345 | ||
| 346 | 1x |
bma.survfits[[j]] <- shrunk.surv |
| 347 |
} |
|
| 348 | ||
| 349 | ||
| 350 | 1x |
if (IC != FALSE) {
|
| 351 | ! |
if (IC == "AIC") IC.values[j] <- -AIC(model.cph) / 2 |
| 352 | 1x |
if (IC == "BIC") IC.values[j] <- -AIC(model.cph, k = log(sum(new.data[status.var]))) / 2 |
| 353 |
} |
|
| 354 |
} |
|
| 355 | ||
| 356 |
# unlist gives the factor levels the wrong names, so take the better ones |
|
| 357 |
# from cph to avoid confusion! |
|
| 358 |
# names(model.coefs) <- names(model.cph$coefs) |
|
| 359 | ||
| 360 | 1x |
ret <- list() |
| 361 | ||
| 362 | 1x |
ret$formula <- writeFormula(models[1], time.var, status.var) # model.formula |
| 363 | 1x |
ret$coefs <- bma.coefs |
| 364 | 1x |
ret$data <- data |
| 365 | 1x |
ret$call <- this.call |
| 366 | ||
| 367 | ||
| 368 | 1x |
if (IC == FALSE) {
|
| 369 | ! |
ret$probability <- posteriors(models) |
| 370 | 1x |
} else if (IC != FALSE) {
|
| 371 | 1x |
log.priors <- unlist(lapply(models, function(i) i$information$logPrior)) |
| 372 | 1x |
IC.posteriors <- exp((IC.values - max(IC.values)) + log.priors) |
| 373 | 1x |
ret$probability <- IC.posteriors / sum(IC.posteriors) |
| 374 |
} |
|
| 375 | ||
| 376 | 1x |
ret$survfit <- matrix(unlist(bma.survfits), byrow = TRUE, nrow = length(bma.survfits)) |
| 377 | 1x |
ret$time <- surv.time |
| 378 | 1x |
ret$bma.surv <- bma.surv |
| 379 | 1x |
ret$model.object <- models # just save the first one for space |
| 380 | 1x |
class(ret) <- "TBFcox.BMA" |
| 381 | 1x |
return(ret) |
| 382 | 1x |
} else if (type == "BMA") {
|
| 383 |
# check for null models and remove them. The fitting procedure can't handle them |
|
| 384 |
# this might be a bit dodgy, but no alternative solution comes to mind |
|
| 385 | 1x |
isNull <- apply(as.data.frame(models)[, -(1:3)], 1, sum) == 0 |
| 386 | 1x |
models[isNull] <- NULL |
| 387 | ||
| 388 | 1x |
if (IC != FALSE) {
|
| 389 | ! |
sbma <- sampleBma(models, postProbs = IC.posteriors[!isNull]) |
| 390 |
} else {
|
|
| 391 | 1x |
sbma <- sampleBma(models) |
| 392 |
} |
|
| 393 | ||
| 394 |
# uc.included <- which(unlist(lapply(sbma$samples@ucCoefs, function(x) is.numeric(mean(x))))) |
|
| 395 | 1x |
uc.included <- which(apply(as.data.frame(models)[, -c(1:3)], 2, any)) |
| 396 | ||
| 397 | ||
| 398 | 1x |
fake.model <- models[1] |
| 399 | 1x |
fake.model[[1]]$information <- list() |
| 400 | ||
| 401 | 1x |
uc.num.inc <- which(attr(fake.model, "termNames")$uc %in% names(uc.included)) |
| 402 | ||
| 403 | 1x |
fake.model[[1]]$configuration <- list(powers = list(), ucTerms = uc.num.inc) |
| 404 | ||
| 405 | 1x |
new.design.matrix <- getDesignMatrix(object = fake.model, intercept = FALSE) |
| 406 | ||
| 407 | ||
| 408 |
# get coefficients for BMA |
|
| 409 |
# if(BMA.method=="coefs"){
|
|
| 410 |
# bma.coefs <- unlist(lapply(sbma$samples@ucCoefs, rowSums))/sbma$samples@nSamples |
|
| 411 |
# } |
|
| 412 | ||
| 413 |
# if(BMA.method=="fits"){
|
|
| 414 | 1x |
bma.coefs <- .lm.fit(x = new.design.matrix, y = rowMeans(sbma$samples@fitted))$coefficients |
| 415 |
# } |
|
| 416 | ||
| 417 |
# things were out of order making predictions terrible! |
|
| 418 |
# bma.coefs <- bma.coefs[order(names(bma.coefs))] |
|
| 419 | 1x |
names(bma.coefs) <- colnames(new.design.matrix) |
| 420 | ||
| 421 | 1x |
new.data <- data.frame(cbind( |
| 422 | 1x |
status.var = attributes(fake.model)$data$censInd, |
| 423 | 1x |
time.var = attributes(fake.model)$data$y, |
| 424 | 1x |
new.design.matrix |
| 425 |
)) |
|
| 426 | ||
| 427 | 1x |
colnames(new.data)[1:2] <- c(status.var, time.var) |
| 428 | ||
| 429 | 1x |
model.formula <- reformulate( |
| 430 | 1x |
setdiff(colnames(new.data), c(time.var, status.var)), |
| 431 | 1x |
response = paste("survival::Surv(", time.var, ",", status.var, ")")
|
| 432 |
) |
|
| 433 | ||
| 434 | 1x |
model.cph <- rms::cph( |
| 435 | 1x |
formula(model.formula), |
| 436 | 1x |
data = new.data, |
| 437 | 1x |
surv = TRUE, |
| 438 | 1x |
se.fit = FALSE, |
| 439 | 1x |
y = TRUE, |
| 440 | 1x |
x = TRUE |
| 441 |
) |
|
| 442 | ||
| 443 | 1x |
if (baseline == "cox") {
|
| 444 | ! |
bma.surv <- rms::Survival(model.cph) |
| 445 |
} |
|
| 446 | ||
| 447 | 1x |
if (baseline == "shrunk") {
|
| 448 |
# Take cox model object, put in shrunken coefficients, re-estimate baseline hazard |
|
| 449 | 1x |
shrunk.cph <- model.cph |
| 450 | 1x |
shrunk.mm <- new.design.matrix # model.matrix(model.formula, data=data)[,-1] |
| 451 |
# shrunk.mm <-scale(shrunk.mm) |
|
| 452 | 1x |
shrunk.cph$linear.predictors <- shrunk.mm %*% bma.coefs |
| 453 | 1x |
shrunk.survfit <- survival::survfit(shrunk.cph) |
| 454 | 1x |
shrunk.surv <- c(1, shrunk.survfit$surv) |
| 455 | 1x |
if (length(shrunk.surv) < length(model.cph$surv)) {
|
| 456 | ! |
shrunk.surv <- c(shrunk.surv, shrunk.surv[length(shrunk.surv)]) |
| 457 |
} |
|
| 458 | 1x |
shrunk.cph$surv <- shrunk.surv |
| 459 | ||
| 460 | 1x |
bma.surv <- rms::Survival(shrunk.cph) |
| 461 |
} |
|
| 462 | ||
| 463 |
# unlist gives the factor levels the wrong names, so take the better ones |
|
| 464 |
# from cph to avoid confusion! |
|
| 465 |
# names(model.coefs) <- names(model.cph$coefs) |
|
| 466 | 1x |
ret <- list() |
| 467 | ||
| 468 |
# ret$formula <- writeFormula(models[1], time.var, status.var) #model.formula |
|
| 469 | 1x |
ret$formula <- formula( |
| 470 | 1x |
paste( |
| 471 | 1x |
"survival::Surv(", time.var, ",", status.var, ") ~",
|
| 472 | 1x |
paste(paste(names(sbma$samples@ucCoefs)), collapse = " + ") |
| 473 |
) |
|
| 474 |
) |
|
| 475 | 1x |
ret$coefs <- bma.coefs |
| 476 | 1x |
ret$data <- data |
| 477 | 1x |
ret$call <- this.call |
| 478 | ||
| 479 | ||
| 480 | 1x |
ret$survival <- bma.surv |
| 481 | 1x |
ret$model.object <- fake.model # just save the first one for space |
| 482 | 1x |
class(ret) <- "TBFcox" |
| 483 | 1x |
return(ret) |
| 484 |
} |
|
| 485 |
############################################################################################################# |
|
| 486 | ||
| 487 | 8x |
if (type %in% c("MAP", "MPM")) {
|
| 488 | 6x |
if (!keepModelList) rm(models) |
| 489 | ||
| 490 | 8x |
new.design.matrix <- getDesignMatrix(object = model.listpart, intercept = FALSE) |
| 491 | ||
| 492 | 8x |
new.data <- data.frame(cbind( |
| 493 | 8x |
status.var = attributes(model.listpart)$data$censInd, |
| 494 | 8x |
time.var = attributes(model.listpart)$data$y, |
| 495 | 8x |
new.design.matrix |
| 496 |
)) |
|
| 497 | ||
| 498 | 8x |
colnames(new.data)[1:2] <- c(status.var, time.var) |
| 499 | ||
| 500 | 8x |
model.formula <- reformulate( |
| 501 | 8x |
setdiff(colnames(new.data), c(time.var, status.var)), |
| 502 | 8x |
response = paste("survival::Surv(", time.var, ",", status.var, ")")
|
| 503 |
) |
|
| 504 | ||
| 505 |
# Do the original shortcut way, using E(beta) in the exp. |
|
| 506 | 8x |
if (sep == FALSE) {
|
| 507 | 7x |
model.coefs <- getModelCoefs(model.listpart, sep = FALSE) |
| 508 | 7x |
model.formula <- reformulate( |
| 509 | 7x |
setdiff(colnames(new.data), c(time.var, status.var)), |
| 510 | 7x |
response = paste("survival::Surv(", time.var, ",", status.var, ")")
|
| 511 |
) |
|
| 512 | 7x |
model.cph <- rms::cph( |
| 513 | 7x |
formula(model.formula), |
| 514 | 7x |
data = new.data, |
| 515 | 7x |
surv = TRUE, |
| 516 | 7x |
se.fit = FALSE, |
| 517 | 7x |
y = TRUE, |
| 518 | 7x |
x = TRUE |
| 519 |
) |
|
| 520 | ||
| 521 | 7x |
ret <- list() |
| 522 | ||
| 523 | 7x |
ret$formula <- writeFormula(model.listpart, time.var, status.var) # model.formula |
| 524 | 7x |
ret$coefs <- model.coefs |
| 525 | 7x |
ret$data <- data |
| 526 | 7x |
ret$call <- this.call |
| 527 | 7x |
ret$survival <- function() {}
|
| 528 | 7x |
ret$model.object <- model.listpart |
| 529 | 2x |
if (exists("models")) ret$model.list <- models
|
| 530 | ||
| 531 | 7x |
if (baseline == "shrunk") {
|
| 532 |
# Take cox model object, put in shrunken coefficients, re-estimate baseline hazard |
|
| 533 | 7x |
shrunk.cph <- model.cph |
| 534 | 7x |
shrunk.mm <- new.design.matrix # model.matrix(model.formula, data=data)[,-1] |
| 535 | ||
| 536 | 7x |
shrunk.cph$linear.predictors <- shrunk.mm %*% model.coefs |
| 537 | 7x |
shrunk.survfit <- survival::survfit(shrunk.cph) |
| 538 | 7x |
shrunk.surv <- c(1, shrunk.survfit$surv) |
| 539 | 7x |
if (length(shrunk.surv) < length(model.cph$surv)) {
|
| 540 | ! |
shrunk.surv <- c(shrunk.surv, shrunk.surv[length(shrunk.surv)]) |
| 541 |
} |
|
| 542 | 7x |
shrunk.cph$surv <- shrunk.surv |
| 543 | ||
| 544 | 7x |
ret$survival <- rms::Survival(shrunk.cph) |
| 545 | ! |
} else if (baseline == "cox") {
|
| 546 | ! |
ret$survival <- rms::Survival(model.cph) |
| 547 |
} |
|
| 548 | ||
| 549 | 7x |
class(ret) <- "TBFcox" |
| 550 | 7x |
return(ret) |
| 551 |
} |
|
| 552 | ||
| 553 |
# Do the full way with E(h_i(t)exp(beta)) |
|
| 554 | 1x |
if (sep == TRUE) {
|
| 555 | 1x |
model.coefs <- getModelCoefs(model.listpart, sep = TRUE) |
| 556 | 1x |
model.formula <- reformulate( |
| 557 | 1x |
setdiff(colnames(new.data), c(time.var, status.var)), |
| 558 | 1x |
response = paste("survival::Surv(", time.var, ",", status.var, ")")
|
| 559 |
) |
|
| 560 | 1x |
model.cph <- rms::cph( |
| 561 | 1x |
formula(model.formula), |
| 562 | 1x |
data = new.data, |
| 563 | 1x |
surv = TRUE, |
| 564 | 1x |
se.fit = FALSE, |
| 565 | 1x |
y = TRUE, |
| 566 | 1x |
x = TRUE |
| 567 |
) |
|
| 568 | ||
| 569 | 1x |
ret <- list() |
| 570 | ||
| 571 | 1x |
ret$formula <- writeFormula(model.listpart, time.var, status.var) # model.formula |
| 572 | ||
| 573 | 1x |
ret$data <- data |
| 574 | 1x |
ret$call <- this.call |
| 575 | ||
| 576 | 1x |
ret$model.object <- model.listpart |
| 577 | ! |
if (exists("models")) ret$model.list <- models
|
| 578 | ||
| 579 | 1x |
ret$survival <- vector(mode = "list", length = length(model.coefs)) |
| 580 | 1x |
ret$coefs <- model.coefs |
| 581 | ||
| 582 | ||
| 583 | 1x |
if (baseline == "shrunk") {
|
| 584 |
# Take cox model object, put in shrunken coefficients, re-estimate baseline hazard |
|
| 585 | 1x |
shrunk.cph <- model.cph |
| 586 | 1x |
shrunk.mm <- new.design.matrix # model.matrix(model.formula, data=data)[,-1] |
| 587 | ||
| 588 | 1x |
for (k in seq_along(model.coefs)) {
|
| 589 | 200x |
shrunk.cph$linear.predictors <- shrunk.mm %*% model.coefs[[k]] |
| 590 | 200x |
shrunk.survfit <- survival::survfit(shrunk.cph) |
| 591 | 200x |
shrunk.surv <- c(1, shrunk.survfit$surv) |
| 592 | 200x |
if (length(shrunk.surv) < length(model.cph$surv)) {
|
| 593 | ! |
shrunk.surv <- c(shrunk.surv, shrunk.surv[length(shrunk.surv)]) |
| 594 |
} |
|
| 595 | 200x |
shrunk.cph$surv <- shrunk.surv |
| 596 | ||
| 597 | 200x |
ret$survival[[k]] <- rms::Survival(shrunk.cph) |
| 598 |
} |
|
| 599 | ! |
} else if (baseline == "cox") {
|
| 600 | ! |
for (k in seq_along(model.coefs)) {
|
| 601 | ! |
ret$survival[[k]] <- rms::Survival(model.cph) |
| 602 |
} |
|
| 603 |
} |
|
| 604 | ||
| 605 | 1x |
class(ret) <- "TBFcox.sep" |
| 606 | 1x |
return(ret) |
| 607 |
} |
|
| 608 |
} # end MAP/MPM |
|
| 609 |
} # end function |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[getDesignMatrix.R] by DSB Die 16/02/2010 13:13 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Construct the design matrix for a given model configuration and GlmBayesMfp object. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 06/01/2010 copy from package bfp, slightly modify signature and add Roxygen comments |
|
| 12 |
## 07/07/2015 add center argument if we want the uncentered design matrix. |
|
| 13 |
##################################################################################### |
|
| 14 | ||
| 15 |
##' @include helpers.R |
|
| 16 |
##' @include getFpTransforms.R |
|
| 17 |
NULL |
|
| 18 | ||
| 19 |
##' Construct the design matrix for a given bfp GLM model |
|
| 20 |
##' |
|
| 21 |
##' This is an internal function to construct the (centered) design matrix for a given |
|
| 22 |
##' bfp GLM model. |
|
| 23 |
##' |
|
| 24 |
##' @param modelConfig the model configuration list which must have elements |
|
| 25 |
##' \dQuote{powers} and \dQuote{powers}. Defaults to the configuration of the first element of
|
|
| 26 |
##' @param object the \code{GlmBayesMfp} object, which is needed because it contains
|
|
| 27 |
##' the covariates matrix and indices vector |
|
| 28 |
##' @param intercept return the intercept column inside the matrix (default) or not? |
|
| 29 |
##' @param center should the data be centered (default) or not? |
|
| 30 |
##' @return The design matrix, where the non-fixed part is columnwise centered (that is, the |
|
| 31 |
##' colmeans are zero). |
|
| 32 |
##' |
|
| 33 |
##' @keywords internal utilities |
|
| 34 |
getDesignMatrix <- function(modelConfig = object[[1]]$configuration, |
|
| 35 |
object, |
|
| 36 |
intercept = TRUE, |
|
| 37 |
center = TRUE) {
|
|
| 38 |
## checks |
|
| 39 | 66x |
stopifnot(is.bool(intercept)) |
| 40 | ||
| 41 |
## extract covariates matrices from object |
|
| 42 | 66x |
data <- attr(object, "data") |
| 43 | 66x |
full <- data$x |
| 44 | 66x |
fullCentered <- data$xCentered |
| 45 | ||
| 46 |
## also extract index vector |
|
| 47 | 66x |
inds <- attr(object, "indices") |
| 48 | ||
| 49 |
## then get powers / ucs from the model configuration |
|
| 50 | 66x |
powers <- modelConfig$powers |
| 51 | 66x |
ucSet <- modelConfig$ucTerms |
| 52 | 66x |
fixSet <- modelConfig$fixTerms |
| 53 | ||
| 54 |
## now the real code starts: |
|
| 55 | ||
| 56 |
## check that only one covariate (the intercept) is fixed |
|
| 57 | 66x |
nFix <- sum(inds$fixed != 0) |
| 58 |
# stopifnot(identical(nFix, 1L)) |
|
| 59 | ||
| 60 |
## see how many ucs and fps are included |
|
| 61 | 66x |
ucColInds <- inds$uc %in% ucSet |
| 62 | 66x |
nUc <- sum(ucColInds) |
| 63 | ||
| 64 | 66x |
fixColInds <- inds$fix %in% fixSet |
| 65 | ||
| 66 | ||
| 67 | 66x |
nFp <- length(unlist(powers)) |
| 68 | ||
| 69 |
## so we know how much space to reserve for the return matrix |
|
| 70 | 66x |
nColumns <- |
| 71 | 66x |
if (intercept) {
|
| 72 | 1x |
1 + nFix + nUc + nFp |
| 73 |
} else {
|
|
| 74 | 65x |
nFix + nUc + nFp |
| 75 |
} |
|
| 76 | ||
| 77 | 66x |
ret <- matrix( |
| 78 | 66x |
nrow = nrow(full), |
| 79 | 66x |
ncol = nColumns |
| 80 |
) |
|
| 81 | 66x |
retColnames <- character(ncol(ret)) |
| 82 | ||
| 83 |
## invariant: already col columns written |
|
| 84 | 66x |
col <- 0 |
| 85 | ||
| 86 | 66x |
if (intercept) {
|
| 87 |
# add the intercept |
|
| 88 | 1x |
ret[, 1] <- full[, 1, drop = FALSE] |
| 89 | 1x |
col <- col + 1 |
| 90 |
} |
|
| 91 | ||
| 92 |
## fp part |
|
| 93 | 66x |
for (i in seq_along(inds$bfp)) {
|
| 94 | 39x |
pi <- powers[[i]] |
| 95 | 39x |
if (len <- length(pi)) { # if there is at least one power
|
| 96 | 12x |
new <- getFpTransforms(full[, inds$bfp[i], drop = FALSE], pi, center = center) |
| 97 | 12x |
newInds <- col + seq_along(pi) |
| 98 | ||
| 99 | 12x |
ret[, newInds] <- new |
| 100 | 12x |
retColnames[newInds] <- colnames(new) |
| 101 | ||
| 102 | 12x |
col <- col + len |
| 103 |
} |
|
| 104 |
} |
|
| 105 | ||
| 106 |
## uc part |
|
| 107 | 66x |
if (length(ucSet)) {
|
| 108 | 57x |
if (center) {
|
| 109 | 53x |
new <- fullCentered[, ucColInds, drop = FALSE] |
| 110 | 4x |
} else if (!center) {
|
| 111 | 4x |
new <- full[, ucColInds, drop = FALSE] |
| 112 |
} |
|
| 113 | 57x |
newInds <- col + seq_len(nUc) |
| 114 | ||
| 115 | 57x |
ret[, newInds] <- new |
| 116 | 57x |
retColnames[newInds] <- colnames(new) |
| 117 | ||
| 118 | 57x |
col <- col + nUc |
| 119 |
} |
|
| 120 | ||
| 121 | ||
| 122 |
# add any other fixed cols |
|
| 123 | 66x |
if (length(fixSet)) {
|
| 124 | ! |
if (center) {
|
| 125 | ! |
new <- fullCentered[, fixColInds, drop = FALSE] |
| 126 | ! |
} else if (!center) {
|
| 127 | ! |
new <- full[, fixColInds, drop = FALSE] |
| 128 |
} |
|
| 129 | ||
| 130 | ! |
newInds <- col + seq_len(sum(fixColInds)) # nFix-1 because we already put the intercept in |
| 131 | ! |
ret[, newInds] <- new |
| 132 | ! |
retColnames[newInds] <- colnames(new) |
| 133 | ||
| 134 | ! |
col <- col + nFix |
| 135 |
} |
|
| 136 | ||
| 137 |
## attach dimnames |
|
| 138 | 66x |
rownames(ret) <- rownames(full) |
| 139 | 66x |
colnames(ret) <- retColnames |
| 140 | ||
| 141 |
## and then return the design matrix |
|
| 142 | 66x |
return(ret) |
| 143 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Isaac Gravestock [isaac *.* gravestock *a*t* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Description: |
|
| 6 |
## Transform variables in formulas with fractional polynomials |
|
| 7 |
## |
|
| 8 |
## History: |
|
| 9 |
## 14/07/2015 Copy from CoxTBFs project |
|
| 10 |
##################################################################################### |
|
| 11 | ||
| 12 | ||
| 13 |
##' Transform formula variables |
|
| 14 |
##' |
|
| 15 |
##' Simple function to apply the Box Tidwell transformation to a variables in a formula. |
|
| 16 |
##' Variable is first shifted and scaled |
|
| 17 |
##' NewVar = (Var+shift)/scale |
|
| 18 |
##' then transformed and optionally centered. |
|
| 19 |
##' Can be used in formulas as poly() is. |
|
| 20 |
##' |
|
| 21 |
##' @param var the variable to transform |
|
| 22 |
##' @param powers one or more powers |
|
| 23 |
##' @param scale value to scale the variable after shifting (default=1) |
|
| 24 |
##' @param shift value to shift the variable by (default=0) |
|
| 25 |
##' @param center center the variable when transforming. |
|
| 26 |
##' |
|
| 27 |
##' @return the transformed vector |
|
| 28 |
##' @keywords utilities |
|
| 29 |
##' @export |
|
| 30 |
##' |
|
| 31 | ||
| 32 |
fpTrans <- function(var, powers = 1, scale = 1, shift = 0, center = TRUE) {
|
|
| 33 | ! |
varname <- deparse(substitute(var)) |
| 34 | ||
| 35 | ! |
varm <- as.matrix(var) |
| 36 | ||
| 37 | ! |
newname <- if (shift != 0) {
|
| 38 | ! |
paste("(", varname, "+", shift, ")", sep = "")
|
| 39 |
} else {
|
|
| 40 | ! |
varname |
| 41 |
} |
|
| 42 | ! |
if (scale != 1) newname <- paste(newname, "/", scale, sep = "") |
| 43 | ||
| 44 | ! |
colnames(varm) <- newname |
| 45 | ||
| 46 | ! |
if (scale == 0) stop("Attempting to scale (divide) by 0 in fpTrans")
|
| 47 | ||
| 48 | ! |
varm <- (varm + shift) / scale |
| 49 | ||
| 50 |
# out <- matrix(nrow=length(var), ncol=length(powers)) |
|
| 51 | ||
| 52 | ! |
res <- getFpTransforms(vec = varm, powers = powers, center = center) |
| 53 | ! |
return(res) |
| 54 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Isaac Gravestock [isaac *.* gravestock *a*t* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Description: |
|
| 6 |
## Estimate shrunken coefficients from GlmBayesMfp object for Cox model |
|
| 7 |
## |
|
| 8 |
## History: |
|
| 9 |
## 14/07/2015 Copied from CoxTBFs project functions-funsel-2.bma.R |
|
| 10 |
##################################################################################### |
|
| 11 | ||
| 12 |
##' @include sampleBma.R |
|
| 13 |
##' @include getDesignMatrix.R |
|
| 14 |
NULL |
|
| 15 | ||
| 16 |
##' Estimate shrunken coefficients from GlmBayesMfp object for Cox model |
|
| 17 |
##' |
|
| 18 |
##' This is an internal function to estimate shrunken coefficients from GlmBayesMfp object |
|
| 19 |
##' for Cox models. It calls \code{\link{sampleBma}} and then calculates coefficents based
|
|
| 20 |
##' on a linear fit. |
|
| 21 |
##' |
|
| 22 |
##' @param model.listpart the glmBfp object for which to construct the survival formula |
|
| 23 |
##' @param ... additional arguments to pass to \code{\link{sampleBma}}
|
|
| 24 |
##' @param sep should coefficients be returned separately (default=FALSE) |
|
| 25 |
##' @return A named vector of coefficients. |
|
| 26 |
##' |
|
| 27 |
##' @keywords internal utilities |
|
| 28 | ||
| 29 |
getModelCoefs <- function(model.listpart, mcmc, sep = FALSE, ...) {
|
|
| 30 | 9x |
DM <- getDesignMatrix(object = model.listpart, intercept = FALSE) |
| 31 | 9x |
n.coefs <- dim(DM)[2] |
| 32 | ||
| 33 | 9x |
if (n.coefs == 0) {
|
| 34 | ! |
return(0) |
| 35 |
} |
|
| 36 | ||
| 37 | 9x |
if (sep == FALSE) {
|
| 38 | 8x |
mcmc.obj <- McmcOptions() |
| 39 |
} else {
|
|
| 40 | 1x |
mcmc.obj <- McmcOptions(samples = 200) |
| 41 |
} |
|
| 42 | ||
| 43 | 1x |
if (methods::hasArg(mcmc)) mcmc.obj <- mcmc |
| 44 | ||
| 45 | 9x |
model.bma <- sampleBma(model.listpart, mcmc = mcmc.obj, verbose = FALSE) |
| 46 | ||
| 47 | ||
| 48 |
# print("break!")
|
|
| 49 | 9x |
all.coefs <- lapply(seq_len(ncol(model.bma$samples@fitted)), function(i) {
|
| 50 | 70300x |
if (sum(is.na(model.bma$samples@fitted[, i])) > 0) {
|
| 51 | ! |
print("This iteration from sampleBMA didn't work, return NA!")
|
| 52 | ! |
return(rep(NA, length(model.bma$samples@fitted[, i]))) |
| 53 |
} |
|
| 54 | 70300x |
result <- .lm.fit(x = as.matrix(DM), y = model.bma$samples@fitted[, i])$coefficients |
| 55 | 70300x |
return(result) |
| 56 |
}) |
|
| 57 | ||
| 58 | 9x |
if (sep == FALSE) {
|
| 59 | 8x |
if (n.coefs > 1) {
|
| 60 | 5x |
arr <- array(unlist(all.coefs), dim = c(length(all.coefs[[1]]), length(all.coefs))) |
| 61 | 5x |
ret.all.coefs <- rowMeans(arr, na.rm = TRUE) |
| 62 |
# ret.all.coefs <- rowMeans(all.coefs,na.rm=TRUE) |
|
| 63 |
} else {
|
|
| 64 | 3x |
ret.all.coefs <- mean(unlist(all.coefs), na.rm = TRUE) |
| 65 |
} |
|
| 66 | ||
| 67 | 8x |
names(ret.all.coefs) <- colnames(DM) |
| 68 | ||
| 69 | 8x |
return(ret.all.coefs) |
| 70 | 1x |
} else if (sep == TRUE) {
|
| 71 | 1x |
return(all.coefs) |
| 72 |
} |
|
| 73 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[plotCurveEstimate.R] by DSB Mit 23/01/2013 18:14 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Plot predictor curve estimates for a given samples object, produced by sampleGlm. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 07/01/2010 copy and modify from package bfp (no method, add Roxygen chunks etc) |
|
| 12 |
## 25/05/2010 expect input of S4 class "GlmBayesMfpSamples"; |
|
| 13 |
## change expected layout of samples matrices (now nParameters x |
|
| 14 |
## nSamples) |
|
| 15 |
## 03/08/2010 rug must be painted after the matplot call |
|
| 16 |
##################################################################################### |
|
| 17 | ||
| 18 |
##' @include hpds.R |
|
| 19 |
NULL |
|
| 20 | ||
| 21 |
##' Function for plotting a fractional polynomial curve estimate |
|
| 22 |
##' |
|
| 23 |
##' Plot a fractional polynomial curve estimate using samples from a single |
|
| 24 |
##' GLM / Cox model or a model average. |
|
| 25 |
##' |
|
| 26 |
##' @param samples an object of class \code{\linkS4class{GlmBayesMfpSamples}},
|
|
| 27 |
##' produced by \code{\link{sampleGlm}} and \code{\link{sampleBma}}.
|
|
| 28 |
##' @param termName string denoting an FP term, as written by the |
|
| 29 |
##' \code{\link[=as.data.frame.GlmBayesMfp]{as.data.frame}} method
|
|
| 30 |
##' @param plevel credible level for pointwise HPD, and \code{NULL} means
|
|
| 31 |
##' no pointwise HPD (default: 0.95). The pointwise intervals are plotted in |
|
| 32 |
##' blue color. |
|
| 33 |
##' @param slevel credible level for simultaneous credible band (SCB), |
|
| 34 |
##' \code{NULL} means no SCB (defaults to \code{plevel}). The simultaneous
|
|
| 35 |
##' intervals are plotted in green color. |
|
| 36 |
##' @param plot if \code{FALSE}, only return values needed to produce the
|
|
| 37 |
##' plot, but do not plot (default is \code{TRUE}, so a plot is made)
|
|
| 38 |
##' @param rug add a rug to the plot? (default: \code{FALSE})
|
|
| 39 |
##' @param addZeros include zero samples for models where the covariate is not |
|
| 40 |
##' included? (default: \code{FALSE}) If \code{TRUE}, this changes the
|
|
| 41 |
##' interpretation of the samples, and therefore curve estimates based on these |
|
| 42 |
##' samples: it is no longer conditional on inclusion of the covariate, but |
|
| 43 |
##' marginally over all models, also those not including the covariate. |
|
| 44 | ||
| 45 |
##' @param \dots further arguments for plotting with \code{\link{matplot}}
|
|
| 46 |
##' @return a list of various plotting information: |
|
| 47 |
##' \item{original}{grid on the original covariate scale}
|
|
| 48 |
##' \item{grid}{grid on the transformed scale}
|
|
| 49 |
##' \item{mean}{pointwise mean curve values}
|
|
| 50 |
##' \item{plower}{lower boundaries for pointwise HPD}
|
|
| 51 |
##' \item{pupper}{upper boundaries for pointwise HPD}
|
|
| 52 |
##' \item{slower}{lower boundaries for SCB}
|
|
| 53 |
##' \item{supper}{upper boundaries for SCB}
|
|
| 54 |
##' \item{obsVals}{observed values of the covariate on the original scale}
|
|
| 55 |
##' \item{partialResids}{not implemented: partial residuals}
|
|
| 56 |
##' \item{transform}{vector of shift and scale parameter}
|
|
| 57 |
##' |
|
| 58 |
##' @keywords regression |
|
| 59 |
##' |
|
| 60 |
##' @importFrom graphics abline curve matplot par |
|
| 61 |
##' @importFrom methods is |
|
| 62 |
##' |
|
| 63 |
##' @export |
|
| 64 |
plotCurveEstimate <- |
|
| 65 |
function(samples, |
|
| 66 |
termName, |
|
| 67 |
plevel = 0.95, |
|
| 68 |
slevel = plevel, |
|
| 69 |
plot = TRUE, |
|
| 70 |
rug = FALSE, |
|
| 71 |
addZeros = FALSE, |
|
| 72 |
...) {
|
|
| 73 |
## check class of "samples" |
|
| 74 | ! |
stopifnot(is(samples, "GlmBayesMfpSamples")) |
| 75 | ||
| 76 |
## extract samples and its attributes (they might be overwritten by cbind |
|
| 77 |
## below) |
|
| 78 | ! |
mat <- samples@bfpCurves[[termName]] |
| 79 | ! |
attrs <- attributes(mat) |
| 80 | ||
| 81 |
## check that there are samples for this covariate |
|
| 82 | ! |
if (is.null(mat)) {
|
| 83 | ! |
stop("There were no samples which include ", termName, " in this model sample!\n")
|
| 84 |
} |
|
| 85 |
## add zeros? todo: this should also work if no samples include this covariate! |
|
| 86 | ! |
if (addZeros) {
|
| 87 | ! |
mat <- cbind( |
| 88 | ! |
mat, |
| 89 | ! |
matrix( |
| 90 | ! |
data = 0, |
| 91 | ! |
nrow = nrow(mat), |
| 92 | ! |
ncol = (samples@nSamples - ncol(mat)) |
| 93 |
) |
|
| 94 |
) |
|
| 95 |
} |
|
| 96 | ||
| 97 |
## start return list |
|
| 98 | ! |
ret <- list() |
| 99 | ||
| 100 |
## x values |
|
| 101 | ! |
ret$grid <- g <- as.vector(attrs$scaledGrid) |
| 102 | ! |
tr <- samples@shiftScaleMax[termName, c("shift", "scale")]
|
| 103 | ! |
ret$original <- g * tr[2] - tr[1] |
| 104 | ||
| 105 |
## compute pwise data |
|
| 106 | ! |
ret$mean <- rowMeans(mat, na.rm = TRUE) |
| 107 | ||
| 108 | ! |
if (!is.null(plevel)) {
|
| 109 | ! |
plowerUpper <- apply(mat, 1, empiricalHpd, level = plevel) |
| 110 | ! |
ret$plower <- plowerUpper[1, ] |
| 111 | ! |
ret$pupper <- plowerUpper[2, ] |
| 112 |
} |
|
| 113 | ||
| 114 |
## simultaneous credible band around the mean |
|
| 115 | ! |
if (!is.null(slevel)) {
|
| 116 | ! |
bandData <- scrHpd(mat, level = slevel, mode = ret$mean) |
| 117 | ! |
ret$slower <- bandData[, "lower"] |
| 118 | ! |
ret$supper <- bandData[, "upper"] |
| 119 |
} |
|
| 120 | ||
| 121 |
## todo: add generalized partial residuals? |
|
| 122 | ||
| 123 |
## ## partial residuals, attention because of possible ties between observed grid values in data! |
|
| 124 |
## resids <- residuals (model) |
|
| 125 | ||
| 126 | ! |
pos <- attrs$whereObsVals |
| 127 | ||
| 128 |
## partialResids <- ret$mean[pos] + resids |
|
| 129 | ! |
partialResids <- NULL |
| 130 | ||
| 131 | ! |
if (plot) {
|
| 132 |
## determine plotting arguments for matlines |
|
| 133 | ! |
matplotList <- list(...) |
| 134 | ! |
if (is.null(matplotList$xlab)) {
|
| 135 | ! |
matplotList$xlab <- termName |
| 136 |
} |
|
| 137 | ! |
if (is.null(matplotList$ylab)) {
|
| 138 | ! |
front <- paste("Average partial predictor g(", termName, ")", sep = "")
|
| 139 | ! |
if (any(tr != c(0, 1))) {
|
| 140 | ! |
middle <- " after the transform " |
| 141 | ! |
back <- |
| 142 | ! |
if (tr[1] != 0) {
|
| 143 | ! |
if (tr[2] != 1) {
|
| 144 | ! |
paste(termName, "%<-% (", termName, " + ", tr[1], ") %/% ", tr[2])
|
| 145 |
} else {
|
|
| 146 | ! |
paste(termName, "%<-%", termName, " + ", tr[1]) |
| 147 |
} |
|
| 148 |
} else {
|
|
| 149 | ! |
paste(termName, "%<-%", termName, "%/%", tr[2]) |
| 150 |
} |
|
| 151 | ! |
annotation <- substitute( |
| 152 | ! |
expression(paste(f, m, b)), |
| 153 | ! |
list( |
| 154 | ! |
f = front, |
| 155 | ! |
m = middle, |
| 156 | ! |
b = parse(text = back)[[1]] |
| 157 |
) |
|
| 158 |
) |
|
| 159 |
} else {
|
|
| 160 | ! |
annotation <- front |
| 161 |
} |
|
| 162 | ! |
matplotList$ylab <- eval(annotation) |
| 163 |
} |
|
| 164 | ! |
if (is.null(matplotList$lty)) {
|
| 165 | ! |
matplotList$lty <- 1 |
| 166 |
} |
|
| 167 | ! |
if (is.null(matplotList$col)) {
|
| 168 | ! |
matplotList$col <- c("black", "blue", "blue", "green", "green")
|
| 169 |
} |
|
| 170 | ! |
if (is.null(matplotList$type)) {
|
| 171 | ! |
matplotList$type <- "l" |
| 172 |
} |
|
| 173 | ! |
matplotList$x <- ret$original |
| 174 | ! |
matplotList$y <- as.data.frame(ret[-match(c("original", "grid"), names(ret))])
|
| 175 | ||
| 176 | ! |
if (is.null(matplotList$ylim)) {
|
| 177 | ! |
matplotList$ylim <- range(c(partialResids, matplotList$y)) |
| 178 |
} |
|
| 179 | ||
| 180 |
## and plot: |
|
| 181 | ||
| 182 |
## first the points |
|
| 183 | ! |
ret$obsVals <- ret$original[pos] |
| 184 | ! |
ret$partialResids <- partialResids |
| 185 |
## plot(ret$obsVals, ret$partialResids, |
|
| 186 |
## type="p", |
|
| 187 |
## xlab=matplotList$xlab, |
|
| 188 |
## ylab=matplotList$ylab, |
|
| 189 |
## ylim=matplotList$ylim, |
|
| 190 |
## cex = 0.5, |
|
| 191 |
## col = "gray") |
|
| 192 | ||
| 193 |
## then the curves, so that they are not over painted over by points |
|
| 194 |
## matplotList$add <- TRUE |
|
| 195 | ! |
do.call(matplot, matplotList) |
| 196 | ||
| 197 |
## possibly the rug |
|
| 198 | ! |
rug <- as.logical(rug) |
| 199 | ! |
if (isTRUE(rug)) {
|
| 200 | ! |
rug(jitter(ret$obsVals), col = "gray") |
| 201 |
} |
|
| 202 |
} |
|
| 203 | ! |
ret$transform <- tr |
| 204 | ||
| 205 | ! |
invisible(ret) |
| 206 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[getFamily.R] by DSB Mit 06/02/2013 10:54 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Helper for glmBayesMfp which extracts an S3 family object with additional elements |
|
| 9 |
## from an option. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 12/02/2010 split from glmBayesMfp.R file |
|
| 13 |
## 12/05/2010 add cauchit link, remove inverse link from the accepted links |
|
| 14 |
## because we cannot deal with that in the generalized g-prior |
|
| 15 |
## framework. |
|
| 16 |
## 06/02/2013 check for missing family, since there is no longer a default |
|
| 17 |
## in glmBayesMfp |
|
| 18 |
##################################################################################### |
|
| 19 | ||
| 20 | ||
| 21 |
##' Helper function for glmBayesMfp: Extracts an S3 family object |
|
| 22 |
##' |
|
| 23 |
##' Extracts an S3 family object, which (at least) the usual elements |
|
| 24 |
##' "family", "link", "linkfun", "linkinv", "variance", "mu.eta", "dev.resids", |
|
| 25 |
##' plus the additional elements: |
|
| 26 |
##' "phi" which includes just the dispersion parameter to be used, |
|
| 27 |
##' and "simulate" which can generate random variates for a given linear predictor and weight vector |
|
| 28 |
##' (this function of course also uses the "phi" value) |
|
| 29 |
##' |
|
| 30 |
##' @param family the family argument passed to \code{\link{glmBayesMfp}}
|
|
| 31 |
##' @param phi the dispersion argument passed to \code{\link{glmBayesMfp}}
|
|
| 32 |
##' |
|
| 33 |
##' @return The returned family object also includes a custom \sQuote{init} function,
|
|
| 34 |
##' which takes the response vector (or matrix) (\sQuote{y}) and the corresponding weight vector
|
|
| 35 |
##' (\sQuote{weights}), processes them to response vector \sQuote{y} and possibly altered weights
|
|
| 36 |
##' \sQuote{weights}, and includes starting values \sQuote{mustart} for the IWLS algorithm.
|
|
| 37 |
##' For example, here the binomial special case of two-column-response matrix is treated exactly in |
|
| 38 |
##' the same way as in \code{\link{glm}}.
|
|
| 39 |
##' |
|
| 40 |
##' @keywords internal |
|
| 41 |
getFamily <- function(family, |
|
| 42 |
phi) {
|
|
| 43 | 15x |
if (missing(family)) {
|
| 44 | ! |
stop(simpleError("Option family must be specified!"))
|
| 45 |
} |
|
| 46 | ||
| 47 |
## check that dispersion is positive etc. |
|
| 48 | 15x |
phi <- as.numeric(phi) |
| 49 | 15x |
stopifnot( |
| 50 | 15x |
identical(length(phi), 1L), |
| 51 | 15x |
phi > 0 |
| 52 |
) |
|
| 53 | ||
| 54 |
## then process the family argument: |
|
| 55 | ||
| 56 |
## convert character to function |
|
| 57 | 15x |
if (is.character(family)) {
|
| 58 | ! |
family <- get(family, mode = "function") |
| 59 |
} |
|
| 60 |
## call function to get object |
|
| 61 | 15x |
if (is.function(family)) {
|
| 62 | 11x |
family <- family() |
| 63 |
} |
|
| 64 |
## check that family has correct form |
|
| 65 | 15x |
stopifnot( |
| 66 | 15x |
inherits(family, "family"), |
| 67 | 15x |
all( |
| 68 | 15x |
c("family", "link", "linkfun", "linkinv", "variance", "mu.eta", "dev.resids") %in%
|
| 69 | 15x |
names(family) |
| 70 |
) |
|
| 71 |
) |
|
| 72 | ||
| 73 |
## check here if family and link are supported. |
|
| 74 |
## (we only want to use links which always map into the valid mu space) |
|
| 75 | ||
| 76 |
## first check the family, |
|
| 77 |
## and along the way, we force the dispersion parameter |
|
| 78 |
## and also choose which links are supported for that family. |
|
| 79 | 15x |
if (identical(family$family, "binomial")) {
|
| 80 | 4x |
okLinks <- c("logit", "cauchit", "probit", "cloglog")
|
| 81 | 4x |
family$phi <- 1 |
| 82 | 4x |
family$simulate <- function(eta, weights) {
|
| 83 |
## check if weights are non-negative integers |
|
| 84 | ! |
stopifnot( |
| 85 | ! |
all((weights %% 1L) == 0L), |
| 86 | ! |
all(weights >= 0L) |
| 87 |
) |
|
| 88 | ||
| 89 |
## then simulate one observation for each lin pred value |
|
| 90 | ! |
rbinom( |
| 91 | ! |
n = length(eta), |
| 92 | ! |
size = weights, |
| 93 | ! |
prob = family$linkinv(eta) |
| 94 |
) |
|
| 95 |
} |
|
| 96 | 11x |
} else if (identical(family$family, "gaussian")) {
|
| 97 | 11x |
okLinks <- c("log", "identity")
|
| 98 | 11x |
family$phi <- phi |
| 99 | 11x |
family$simulate <- function(eta, weights) {
|
| 100 | ! |
rnorm( |
| 101 | ! |
n = length(eta), |
| 102 | ! |
mean = family$linkinv(eta), |
| 103 | ! |
sd = sqrt(phi / weights) |
| 104 |
) |
|
| 105 |
} |
|
| 106 | ! |
} else if (identical(family$family, "poisson")) {
|
| 107 | ! |
okLinks <- c("log")
|
| 108 | ! |
family$phi <- 1 |
| 109 | ! |
family$simulate <- function(eta, weights) {
|
| 110 |
## here we do not use the weights at all! |
|
| 111 | ! |
rpois( |
| 112 | ! |
n = length(eta), |
| 113 | ! |
lambda = family$linkinv(eta) |
| 114 |
) |
|
| 115 |
} |
|
| 116 |
} else {
|
|
| 117 | ! |
stop(simpleError(paste( |
| 118 | ! |
"family", |
| 119 | ! |
family$family, |
| 120 | ! |
"not supported" |
| 121 |
))) |
|
| 122 |
} |
|
| 123 | ||
| 124 | ||
| 125 |
## warn if phi has changed |
|
| 126 | 15x |
if (family$phi != phi) {
|
| 127 | ! |
warning(simpleWarning(paste( |
| 128 | ! |
"dispersion was changed from", |
| 129 | ! |
phi, |
| 130 | ! |
"to", |
| 131 | ! |
family$phi |
| 132 |
))) |
|
| 133 |
} |
|
| 134 | ||
| 135 |
## then check the link |
|
| 136 | 15x |
if (!(family$link %in% okLinks)) {
|
| 137 | ! |
stop(simpleError(paste( |
| 138 | ! |
"link", |
| 139 | ! |
family$link, |
| 140 | ! |
"not supported for family", |
| 141 | ! |
family$family |
| 142 |
))) |
|
| 143 |
} |
|
| 144 | ||
| 145 |
## now attach a nicer initialize function, instead of just the expression e.g. in |
|
| 146 |
## binomial()$initialize |
|
| 147 | 15x |
family$init <- function(y, # the response vector or matrix |
| 148 | 15x |
weights, # the weights vector |
| 149 | 15x |
nobs = length(weights)) { # number of observations
|
| 150 | ||
| 151 |
## create dummy variables which are sometimes needed in the |
|
| 152 |
## initializer expression |
|
| 153 | 15x |
etastart <- start <- mustart <- NULL |
| 154 | ||
| 155 |
## evaluate the initializer expression, which modifies y and weights |
|
| 156 |
## and computes mustart values |
|
| 157 | 15x |
eval(family$initialize) |
| 158 | ||
| 159 |
## return these |
|
| 160 | 15x |
return(list( |
| 161 | 15x |
y = y, |
| 162 | 15x |
weights = weights, |
| 163 | 15x |
linPredStart = family$linkfun(mustart) |
| 164 |
)) |
|
| 165 |
## use start values on the linear predictor scale for ease and clarity in C++. |
|
| 166 |
} |
|
| 167 | ||
| 168 | 15x |
return(family) |
| 169 |
} |
| 1 |
#' Prediction methods for CoxTBF objects |
|
| 2 |
#' |
|
| 3 |
#' Predicts survival probabilities at given times. Compatible with predictSurvProb functions |
|
| 4 |
#' required by \code{pec} package.
|
|
| 5 |
#' |
|
| 6 |
#' @param object a model fitted with \code{\link{coxTBF}}
|
|
| 7 |
#' @param newdata a data frame with the same variables as the original data used to fit the object |
|
| 8 |
#' @param times a vector of times to predict survival probability for |
|
| 9 |
#' @param ... not used. |
|
| 10 |
#' |
|
| 11 |
#' @return A data frame of survival probabilities with rows for each row of newdata and columns for each time. |
|
| 12 |
#' @export |
|
| 13 |
#' |
|
| 14 |
predict.TBFcox <- function(object, newdata = object$data, times, ...) {
|
|
| 15 |
# print("predictSurvProb TBF.Cox")
|
|
| 16 |
# print(paste("Train N =",nrow(attr(object$model.object,"data")$x),
|
|
| 17 |
# ". Predict N = ", nrow(newdata))) |
|
| 18 |
# From sampleGlm.R from glmBfp packages by Sabanes Bove |
|
| 19 | 1x |
obj <- object$model.object |
| 20 | 1x |
config <- obj[[1]]$configuration |
| 21 |
## get new data matrix |
|
| 22 | 1x |
tempX <- constructNewdataMatrix( |
| 23 | 1x |
object = obj, |
| 24 | 1x |
newdata = newdata |
| 25 |
) |
|
| 26 | ||
| 27 |
## copy old GlmBayesMfp object |
|
| 28 | 1x |
tempObj <- obj |
| 29 | ||
| 30 |
## correct model matrix in tempMod to new data matrix |
|
| 31 | 1x |
attr(tempObj, "data") <- |
| 32 | 1x |
list( |
| 33 | 1x |
x = tempX, |
| 34 | 1x |
xCentered = scale(tempX, center = TRUE, scale = FALSE) |
| 35 |
) |
|
| 36 | ||
| 37 |
## so we can get the design matrix |
|
| 38 | ||
| 39 |
# lets see what happens if we do this another way |
|
| 40 | 1x |
newDesignUC <- getDesignMatrix( |
| 41 | 1x |
modelConfig = config, |
| 42 | 1x |
object = tempObj, |
| 43 | 1x |
intercept = FALSE, |
| 44 | 1x |
center = FALSE |
| 45 |
) |
|
| 46 | ||
| 47 | ||
| 48 | 1x |
oldDesignUC <- getDesignMatrix( |
| 49 | 1x |
modelConfig = config, |
| 50 | 1x |
object = obj, |
| 51 | 1x |
intercept = FALSE, |
| 52 | 1x |
center = FALSE |
| 53 |
) |
|
| 54 | ||
| 55 | 1x |
oldMeans <- colMeans(oldDesignUC) |
| 56 | ||
| 57 | 1x |
for (i in seq_along(oldMeans)) {
|
| 58 | 4x |
newDesignUC[, i] <- newDesignUC[, i] - oldMeans[i] |
| 59 |
} |
|
| 60 | ||
| 61 |
## so the linear predictor samples are |
|
| 62 | 1x |
linPredSamples <- newDesignUC %*% object$coefs |
| 63 | ||
| 64 | ||
| 65 | 1x |
return(object$survival(times, linPredSamples)) |
| 66 |
} |
|
| 67 | ||
| 68 | ||
| 69 | ||
| 70 | ||
| 71 | ||
| 72 |
#' Prediction methods for CoxTBF objects with separate estimates |
|
| 73 |
#' |
|
| 74 |
#' Predicts survival probabilities at given times. Compatible with predictSurvProb functions |
|
| 75 |
#' required by \code{pec} package. Predicts objects with fitted with \code{sep=TRUE}
|
|
| 76 |
#' |
|
| 77 |
#' @param object a model fitted with \code{\link{coxTBF}}
|
|
| 78 |
#' @param newdata a dataframe with the same variables as the original data used to fit the object |
|
| 79 |
#' @param times a vector of times to predict survival probability for |
|
| 80 |
#' @param ... not used. |
|
| 81 |
#' |
|
| 82 |
#' @return A data frame of survival probabilities with rows for each row of newdata and columns for each time. |
|
| 83 |
#' @export |
|
| 84 |
#' |
|
| 85 |
#' |
|
| 86 |
predict.TBFcox.sep <- function(object, newdata, times, ...) {
|
|
| 87 | 1x |
print("predictSurvProb TBF.Cox.sep")
|
| 88 | 1x |
print(paste( |
| 89 | 1x |
"Train N =", nrow(attr(object$model.object, "data")$x), |
| 90 | 1x |
". Predict N = ", nrow(newdata) |
| 91 |
)) |
|
| 92 |
# From sampleGlm.R from glmBfp packages by Sabanes Bove |
|
| 93 | 1x |
obj <- object$model.object |
| 94 | 1x |
config <- obj[[1]]$configuration |
| 95 |
## get new data matrix |
|
| 96 | 1x |
tempX <- constructNewdataMatrix( |
| 97 | 1x |
object = obj, |
| 98 | 1x |
newdata = newdata |
| 99 |
) |
|
| 100 | ||
| 101 |
## copy old GlmBayesMfp object |
|
| 102 | 1x |
tempObj <- obj |
| 103 | ||
| 104 |
## correct model matrix in tempMod to new data matrix |
|
| 105 | 1x |
attr(tempObj, "data") <- |
| 106 | 1x |
list( |
| 107 | 1x |
x = tempX, |
| 108 | 1x |
xCentered = scale(tempX, center = TRUE, scale = FALSE) |
| 109 |
) |
|
| 110 | ||
| 111 |
## so we can get the design matrix |
|
| 112 | ||
| 113 |
# lets see what happens if we do this another way |
|
| 114 | 1x |
newDesignUC <- getDesignMatrix( |
| 115 | 1x |
modelConfig = config, |
| 116 | 1x |
object = tempObj, |
| 117 | 1x |
intercept = FALSE, |
| 118 | 1x |
center = FALSE |
| 119 |
) |
|
| 120 | ||
| 121 | ||
| 122 | 1x |
oldDesignUC <- getDesignMatrix( |
| 123 | 1x |
modelConfig = config, |
| 124 | 1x |
object = obj, |
| 125 | 1x |
intercept = FALSE, |
| 126 | 1x |
center = FALSE |
| 127 |
) |
|
| 128 | ||
| 129 | 1x |
oldMeans <- colMeans(oldDesignUC) |
| 130 | ||
| 131 | 1x |
for (i in seq_along(oldMeans)) {
|
| 132 | 1x |
newDesignUC[, i] <- newDesignUC[, i] - oldMeans[i] |
| 133 |
} |
|
| 134 | ||
| 135 |
## so the linear predictor samples are |
|
| 136 | 1x |
linPredSamples <- lapply(object$coefs, function(coefs) newDesignUC %*% coefs) |
| 137 | ||
| 138 | 1x |
k <- length(linPredSamples) |
| 139 | ||
| 140 | 1x |
ret.survival <- lapply(1:k, function(i) object$survival[[i]](times, linPredSamples[[i]])) |
| 141 | ||
| 142 | 1x |
ret.surv2 <- matrix(0, nrow = nrow(as.data.frame(ret.survival[[1]])), ncol = length(times)) |
| 143 | 1x |
for (i in 1:k) {
|
| 144 | 200x |
ret.surv2 <- ret.surv2 + ret.survival[[i]] / k |
| 145 |
} |
|
| 146 | ||
| 147 | 1x |
return(ret.surv2) |
| 148 |
} |
|
| 149 | ||
| 150 | ||
| 151 | ||
| 152 | ||
| 153 |
#' Prediction methods for CoxTBF objects for BMA models |
|
| 154 |
#' |
|
| 155 |
#' Predicts survival probabilities at given times. Compatible with predictSurvProb functions |
|
| 156 |
#' required by \code{pec} package. Predicts BMA objects.
|
|
| 157 |
#' |
|
| 158 |
#' @param object a model fitted with \code{\link{coxTBF}}
|
|
| 159 |
#' @param newdata a dataframe with the same variables as the original data used to fit the object |
|
| 160 |
#' @param times a vector of times to predict survival probability for |
|
| 161 |
#' @param ... not used. |
|
| 162 |
#' |
|
| 163 |
#' @return A data frame of survival probabilities with rows for each row of newdata and columns for each time. |
|
| 164 |
#' @export |
|
| 165 |
#' |
|
| 166 |
#' |
|
| 167 |
predict.TBFcox.BMA <- function(object, newdata, times, ...) {
|
|
| 168 | 1x |
post <- object$probability / sum(object$probability) |
| 169 | 1x |
time <- object$time |
| 170 | ||
| 171 | 1x |
N <- nrow(newdata) |
| 172 | 1x |
k <- length(object$model.object) |
| 173 | 1x |
t <- length(times) |
| 174 | ||
| 175 | 1x |
preds <- matrix(0, nrow = t, ncol = N) |
| 176 | 1x |
Big.Matrix <- matrix(0, nrow = k, ncol = t) |
| 177 | 1x |
LP.Matrix <- matrix(0, nrow = N, ncol = k) |
| 178 | 1x |
W.Matrix <- diag(x = post) |
| 179 | ||
| 180 | ||
| 181 | 1x |
print(paste("Predict BMA", "N=", N, "k=", k, "t=", t))
|
| 182 |
# Populate (exp) LP.Matrix |
|
| 183 |
# This can probably be made faster since I think some of the matrices are identical |
|
| 184 | ||
| 185 | 1x |
print("Making Model Matrices")
|
| 186 | 1x |
for (i in seq_along(object$model.object)) {
|
| 187 |
# From sampleGlm.R from glmBfp packages by Sabanes Bove |
|
| 188 | 1x |
obj <- object$model.object[i] |
| 189 | 1x |
config <- obj[[1]]$configuration |
| 190 |
## get new data matrix |
|
| 191 | 1x |
tempX <- constructNewdataMatrix( |
| 192 | 1x |
object = obj, |
| 193 | 1x |
newdata = newdata |
| 194 |
) |
|
| 195 | ||
| 196 |
## copy old GlmBayesMfp object |
|
| 197 | 1x |
tempObj <- obj |
| 198 | ||
| 199 |
## correct model matrix in tempMod to new data matrix |
|
| 200 | 1x |
attr(tempObj, "data") <- |
| 201 | 1x |
list( |
| 202 | 1x |
x = tempX, |
| 203 | 1x |
xCentered = scale(tempX, center = TRUE, scale = FALSE) |
| 204 |
) |
|
| 205 | ||
| 206 |
## so we can get the design matrix |
|
| 207 | ||
| 208 |
# lets see what happens if we do this another way |
|
| 209 | 1x |
newDesignUC <- getDesignMatrix( |
| 210 | 1x |
modelConfig = config, |
| 211 | 1x |
object = tempObj, |
| 212 | 1x |
intercept = FALSE, |
| 213 | 1x |
center = FALSE |
| 214 |
) |
|
| 215 | ||
| 216 | ||
| 217 | 1x |
oldDesignUC <- getDesignMatrix( |
| 218 | 1x |
modelConfig = config, |
| 219 | 1x |
object = obj, |
| 220 | 1x |
intercept = FALSE, |
| 221 | 1x |
center = FALSE |
| 222 |
) |
|
| 223 | ||
| 224 | 1x |
oldMeans <- colMeans(oldDesignUC) |
| 225 | ||
| 226 | 1x |
newDesignUC <- newDesignUC - rep(oldMeans, each = nrow(newDesignUC)) |
| 227 | ||
| 228 |
## so the linear predictor samples (after exp transform) are |
|
| 229 | 1x |
LP.Matrix[, i] <- exp(newDesignUC %*% object$coefs[[i]]) |
| 230 |
} |
|
| 231 | ||
| 232 | 1x |
print("Making Big Matrix")
|
| 233 |
# Populate Big.Matrix |
|
| 234 | 1x |
for (i in 1:t) {
|
| 235 | 2x |
tm <- max((seq_along(time))[time <= times[i] + 1e-06]) |
| 236 | 2x |
Big.Matrix[, i] <- object$survfit[, tm] |
| 237 | 2x |
if (times[i] > max(time) + 1e-06) {
|
| 238 | ! |
Big.Matrix[, i] <- NA |
| 239 |
} |
|
| 240 |
} |
|
| 241 | ||
| 242 | 1x |
print("Making Predictions")
|
| 243 |
# for(i in 1:k){
|
|
| 244 |
# print(paste("p=",i,"of",k))
|
|
| 245 |
# preds <- preds + post[i]* outer(Big.Matrix[i,], LP.Matrix[,i], "^") |
|
| 246 |
# } |
|
| 247 | 1x |
preds <- predBMAcpp(Big.Matrix, LP.Matrix, post) |
| 248 | ||
| 249 | 1x |
print("Finished Predictions")
|
| 250 | 1x |
preds <- t(preds) |
| 251 | ! |
if (is.data.frame(preds)) colnames(preds) <- times |
| 252 | 1x |
return(preds) |
| 253 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[sampleGlm.R] by DSB Mon 26/08/2013 15:43 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Produce posterior samples from one GLM returned by glmBayesMfp, using an MCMC sampler. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 10/12/2009 file creation |
|
| 12 |
## 05/01/2010 construct the random number generator for the marginal z approximation |
|
| 13 |
## *inside* the helper function, so that we can use the normalizing constant |
|
| 14 |
## computed by Runuran instead of doing another "integrate" call. |
|
| 15 |
## 06/01/2010 add postprocessing of the coefficients samples |
|
| 16 |
## 16/02/2010 correct title of getLogMargLikEstimate man page |
|
| 17 |
## 17/02/2010 split off getMarginalZ into separate code file |
|
| 18 |
## 18/02/2010 split off getLogMargLikEstimate into separate code file |
|
| 19 |
## 15/03/2010 add check for class of mcmc argument ... |
|
| 20 |
## 17/03/2010 ... but do it correctly (the class is called McmcOptions and not Mcmc) |
|
| 21 |
## 08/04/2010 Add description of the return list (S3 class "GlmBayesMfpSamples"). |
|
| 22 |
## 17/05/2010 Add "useOpenMP" option, which makes it easier |
|
| 23 |
## to switch than setting the environment variable OMP_NUM_THREADS |
|
| 24 |
## (especially when using R on the server via Emacs Tramp...) |
|
| 25 |
## 24/05/2010 return samples on the response scale ("response") instead of their
|
|
| 26 |
## averages ("fitted"), because this averaging can better be done in
|
|
| 27 |
## a "fitted" method and we need the raw samples in sampleBma.R |
|
| 28 |
## 25/05/2010 different structure of return list; |
|
| 29 |
## layout of bfp and uc samples is now nPar x nSamples. |
|
| 30 |
## 28/05/2010 - remove argument "weights" which is no longer needed here, |
|
| 31 |
## as we only sample the linear predictors for new observations. |
|
| 32 |
## - rename "response" to "fitted", which shall also contain the |
|
| 33 |
## *linear predictors* for the fitted data (and not the *means*). |
|
| 34 |
## 02/07/2010 extend the functionality to the case with fixed z (option "fixedZ") |
|
| 35 |
## 08/07/2010 check if the models were fit by empirical Bayes. In that |
|
| 36 |
## case use the fixed z option ! |
|
| 37 |
## 26/07/2010 Do not drop dimensions when selecting UC coefficients samples |
|
| 38 |
## 23/11/2012 modifications to accommodate the TBF methodology |
|
| 39 |
## 03/12/2012 modifications to accommodate the Cox models |
|
| 40 |
## 24/01/2013 adapt for fixedg option |
|
| 41 |
## 03/07/2013 comment on offsets |
|
| 42 |
##################################################################################### |
|
| 43 | ||
| 44 |
##' @include helpers.R |
|
| 45 |
##' @include McmcOptions-class.R |
|
| 46 |
##' @include McmcOptions-methods.R |
|
| 47 |
##' @include GlmBayesMfpSamples-class.R |
|
| 48 |
##' @include getMarginalZ.R |
|
| 49 |
##' @include getLogMargLikEstimate.R |
|
| 50 |
NULL |
|
| 51 | ||
| 52 |
##' Produce posterior samples from one GLM / Cox model |
|
| 53 |
##' |
|
| 54 |
##' Based on the result list from \code{\link{glmBayesMfp}}, for the first model
|
|
| 55 |
##' in the list MCMC samples are produced. In parallel to the sampling of |
|
| 56 |
##' coefficients and FP curve points, optionally the marginal likelihood of the |
|
| 57 |
##' model is estimated with MCMC samples. This provides a check of the |
|
| 58 |
##' integrated Laplace approximation used in the model sampling. If TBF |
|
| 59 |
##' methodology is used, then no MCMC is necessary, instead ordinary Monte Carlo |
|
| 60 |
##' samples from an approximate posterior distribution are obtained. |
|
| 61 |
##' |
|
| 62 |
##' @param object the \code{GlmBayesMfp} object, from which only the first model
|
|
| 63 |
##' will be processed (at least for now \ldots) |
|
| 64 |
##' @param mcmc MCMC options object with class \code{\linkS4class{McmcOptions}}.
|
|
| 65 |
##' If TBF is used, each sample is accepted, and the number of samples is given |
|
| 66 |
##' by \code{\link{sampleSize}}(\code{mcmc}).
|
|
| 67 |
##' @param estimateMargLik shall the marginal likelihood be estimated in |
|
| 68 |
##' parallel? (default) Only has an effect if full Bayes and not TBF is used. |
|
| 69 |
##' @param gridList optional list of appropriately named grid vectors for FP |
|
| 70 |
##' evaluation. Default is length (\code{gridSize} - 2) grid per covariate
|
|
| 71 |
##' additional to the observed values (two are at the endpoints) |
|
| 72 |
##' @param gridSize see above (default: 203) |
|
| 73 |
##' @param newdata new covariate data.frame with exactly the names (and |
|
| 74 |
##' preferably ranges) as before (default: no new covariate data) Note that |
|
| 75 |
##' there is no option for offsets for new data at the moment. Just add the |
|
| 76 |
##' offsets to the \code{predictions} slot of \code{samples} in the return list
|
|
| 77 |
##' yourself. |
|
| 78 |
##' @param fixedZ either \code{NULL} (default) or a (single) fixed z value to
|
|
| 79 |
##' be used, in order to sample from the conditional posterior given this z. |
|
| 80 |
##' If \code{object} was constructed by the empirical Bayes machinery,
|
|
| 81 |
##' this will default to the estimated z with maximum conditional marginal |
|
| 82 |
##' likelihood. If \code{object} was constructed with the option \code{fixedg},
|
|
| 83 |
##' then the fixed value will be used by default. |
|
| 84 |
##' @param marginalZApprox method for approximating the marginal density of the |
|
| 85 |
##' log covariance factor z, see \code{\link{getMarginalZ}} for the details
|
|
| 86 |
##' (default: same preference list as in \code{\link{getMarginalZ}})
|
|
| 87 |
##' If TBF are used in conjunction with incomplete inverse gamma hyperprior on |
|
| 88 |
##' g = exp(z), then the posterior distribution of g is again of this form. |
|
| 89 |
##' Therefore this option does not have any effect in that case, because the |
|
| 90 |
##' samples are directly obtained from that posterior distribution. |
|
| 91 |
##' @param verbose should information on computation progress be given? |
|
| 92 |
##' (default) |
|
| 93 |
##' @param debug print debugging information? (not default) |
|
| 94 |
##' @param useOpenMP shall OpenMP be used to accelerate the computations? |
|
| 95 |
##' (default) |
|
| 96 |
##' @param correctedCenter If TRUE predict new data based on the centering |
|
| 97 |
##' of the original data. |
|
| 98 |
##' |
|
| 99 |
##' @return Returns a list with the following elements: |
|
| 100 |
##' \describe{
|
|
| 101 |
##' \item{samples}{an object of S4 class
|
|
| 102 |
##' \code{\linkS4class{GlmBayesMfpSamples}}}
|
|
| 103 |
##' \item{coefficients}{samples of all original coefficients in the model
|
|
| 104 |
##' (nCoefs x nSamples)} |
|
| 105 |
##' \item{acceptanceRatio}{proportion of accepted Metropolis-Hastings proposals}
|
|
| 106 |
##' \item{logMargLik}{if \code{estimateMargLik} is \code{TRUE}, this list is
|
|
| 107 |
##' included: it contains the elements \code{numeratorTerms} and
|
|
| 108 |
##' \code{denominatorTerms} for the numerator and denominator samples of the
|
|
| 109 |
##' Chib Jeliazkov marginal likelihood estimate, |
|
| 110 |
##' \code{highDensityPointLogUnPosterior} is the log unnormalized posterior
|
|
| 111 |
##' density at the fixed parameter and the resulting \code{estimate} and
|
|
| 112 |
##' \code{standardError}.}
|
|
| 113 |
##' } |
|
| 114 |
##' |
|
| 115 |
##' @importFrom Rcpp evalCpp |
|
| 116 |
##' |
|
| 117 |
##' @export |
|
| 118 |
##' @keywords models regression |
|
| 119 |
sampleGlm <- |
|
| 120 |
function(object, |
|
| 121 |
mcmc = McmcOptions(), |
|
| 122 |
estimateMargLik = TRUE, |
|
| 123 |
gridList = list(), |
|
| 124 |
gridSize = 203L, |
|
| 125 |
newdata = NULL, |
|
| 126 |
fixedZ = NULL, |
|
| 127 |
marginalZApprox = NULL, |
|
| 128 |
verbose = TRUE, |
|
| 129 |
debug = FALSE, |
|
| 130 |
useOpenMP = TRUE, |
|
| 131 |
correctedCenter = FALSE) {
|
|
| 132 |
## check the object |
|
| 133 | 41x |
if (!inherits(object, "GlmBayesMfp")) {
|
| 134 | ! |
stop(simpleError("object must be of class GlmBayesMfp"))
|
| 135 |
} |
|
| 136 | 41x |
if (!(length(object) >= 1L)) {
|
| 137 | ! |
stop(simpleError("there has to be at least one model in the object"))
|
| 138 |
} |
|
| 139 | ||
| 140 |
## other checks |
|
| 141 | 41x |
stopifnot( |
| 142 | 41x |
is.bool(estimateMargLik), |
| 143 | 41x |
is.bool(verbose), |
| 144 | 41x |
is.bool(debug), |
| 145 | 41x |
is(mcmc, "McmcOptions"), |
| 146 | 41x |
is.bool(useOpenMP) |
| 147 |
) |
|
| 148 | ||
| 149 |
## coerce newdata to data frame |
|
| 150 | 41x |
newdata <- as.data.frame(newdata) |
| 151 | 41x |
nNewObs <- nrow(newdata) |
| 152 | ||
| 153 |
## ## covariates matrix for newdata (if there is any): |
|
| 154 |
## newX <- |
|
| 155 |
## if(nNewObs > 0L) |
|
| 156 |
## {
|
|
| 157 |
## constructNewdataMatrix(object=object, |
|
| 158 |
## newdata=newdata) |
|
| 159 |
## } |
|
| 160 |
## else |
|
| 161 |
## NULL |
|
| 162 | ||
| 163 |
## get the old attributes of the object |
|
| 164 | 41x |
attrs <- attributes(object) |
| 165 | ||
| 166 |
## is this a GLM or a Cox model? |
|
| 167 | 41x |
doGlm <- attrs$distribution$doGlm |
| 168 | ||
| 169 |
## take only the first model |
|
| 170 | 41x |
model <- object[[1L]] |
| 171 | 41x |
config <- model$configuration |
| 172 | 41x |
info <- model$information |
| 173 | ||
| 174 |
## get the (centered) design matrix of the model for the original data |
|
| 175 | 41x |
design <- getDesignMatrix( |
| 176 | 41x |
modelConfig = config, |
| 177 | 41x |
object = object, |
| 178 | 41x |
intercept = doGlm |
| 179 |
) |
|
| 180 |
## the model dimension (including the intercept for GLMs) |
|
| 181 | 41x |
modelDim <- ncol(design) |
| 182 | ||
| 183 |
## check if the model is the null model |
|
| 184 | 41x |
isNullModel <- identical( |
| 185 | 41x |
modelDim, |
| 186 | 41x |
ifelse(doGlm, 1L, 0L) |
| 187 |
) |
|
| 188 | ||
| 189 |
## check / modify the fixedZ option |
|
| 190 | 41x |
if (is.null(fixedZ)) {
|
| 191 | 41x |
if (isNullModel) {
|
| 192 | ! |
fixedZ <- 0 |
| 193 | 41x |
} else if (attrs$searchConfig$useFixedg || attrs$searchConfig$empiricalBayes) {
|
| 194 | 3x |
fixedZ <- info$zMode |
| 195 |
} |
|
| 196 |
} |
|
| 197 | ||
| 198 | 41x |
if (useFixedZ <- !is.null(fixedZ)) {
|
| 199 | 3x |
stopifnot( |
| 200 | 3x |
is.numeric(fixedZ), |
| 201 | 3x |
is.finite(fixedZ), |
| 202 | 3x |
identical(length(fixedZ), 1L) |
| 203 |
) |
|
| 204 |
} else {
|
|
| 205 | 38x |
fixedZ <- 0 |
| 206 |
} |
|
| 207 | ||
| 208 |
## extract TBF and g-prior info |
|
| 209 | 41x |
tbf <- attrs$distribution$tbf |
| 210 | 41x |
gPrior <- attrs$distribution$gPrior |
| 211 | ||
| 212 |
## correct estimateMargLik option |
|
| 213 | 41x |
estimateMargLik <- estimateMargLik && (!tbf) |
| 214 | ||
| 215 |
## correct MCMC option |
|
| 216 | 41x |
if (tbf) {
|
| 217 | 40x |
mcmc <- McmcOptions( |
| 218 | 40x |
burnin = 0L, |
| 219 | 40x |
step = 1L, |
| 220 | 40x |
samples = sampleSize(mcmc) |
| 221 |
) |
|
| 222 |
} |
|
| 223 | ||
| 224 |
## compute the selected approximation to the marginal z posterior, |
|
| 225 |
## including the log density and the random number generator for it |
|
| 226 | 41x |
marginalz <- |
| 227 | 41x |
if (useFixedZ) {
|
| 228 |
## this is for the special case with a fixed z |
|
| 229 | 3x |
list( |
| 230 | 3x |
logDens = function(z) ifelse(z == fixedZ, 0, -Inf), |
| 231 | 3x |
gen = function(n = 1) rep.int(fixedZ, n) |
| 232 |
) |
|
| 233 |
} else {
|
|
| 234 | 38x |
if (tbf && is(gPrior, "IncInvGammaGPrior")) {
|
| 235 |
## analytical solution possible in this case! |
|
| 236 |
## compute posterior parameters of IncIG: |
|
| 237 | ! |
a <- gPrior@a + (modelDim - 1) / 2 |
| 238 | ! |
b <- gPrior@b + info$residualDeviance / 2 |
| 239 | ||
| 240 | ! |
list( |
| 241 | ! |
logDens = function(z) {
|
| 242 | ! |
logNormConst <- ifelse( |
| 243 | ! |
b > 0, |
| 244 | ! |
a * log(b) - pgamma(b, a, log.p = TRUE) - lgamma(a), |
| 245 | ! |
log(a) |
| 246 |
) |
|
| 247 | ! |
logNormConst - (a + 1) * log1p(exp(z)) - b / (1 + exp(z)) + z |
| 248 |
}, |
|
| 249 | ! |
gen = function(n = 1) {
|
| 250 | ! |
p <- runif(n = n) |
| 251 | ||
| 252 | ! |
ret <- |
| 253 | ! |
if (b > 0) {
|
| 254 | ! |
b / qgamma(p = (1 - p) * pgamma(b, a), shape = a) - 1 |
| 255 |
} else {
|
|
| 256 | ! |
(1 - p)^(-1 / a) - 1 |
| 257 |
} |
|
| 258 | ! |
return(ret) |
| 259 |
} |
|
| 260 |
) |
|
| 261 |
} else {
|
|
| 262 |
## the usual way: |
|
| 263 | 38x |
getMarginalZ(info, |
| 264 | 38x |
method = marginalZApprox, |
| 265 | 38x |
verbose = verbose |
| 266 |
) |
|
| 267 |
} |
|
| 268 |
} |
|
| 269 | ||
| 270 |
## pack the info in handy lists: |
|
| 271 | ||
| 272 |
## pack the options |
|
| 273 | 41x |
options <- list( |
| 274 | 41x |
mcmc = mcmc, |
| 275 | 41x |
estimateMargLik = estimateMargLik, |
| 276 | 41x |
verbose = verbose, |
| 277 | 41x |
debug = debug, |
| 278 | 41x |
isNullModel = isNullModel, |
| 279 | 41x |
useFixedZ = useFixedZ, |
| 280 | 41x |
fixedZ = as.double(fixedZ), |
| 281 | 41x |
useOpenMP = useOpenMP |
| 282 |
) |
|
| 283 | ||
| 284 |
## start the progress bar (is continued in the C++ code) |
|
| 285 | 41x |
if (verbose) {
|
| 286 | 32x |
cat("0%", rep("_", 100 - 6), "100%\n", sep = "")
|
| 287 |
} |
|
| 288 | ||
| 289 |
## then call C++ to do the rest: |
|
| 290 | 41x |
cppResults <- cpp_sampleGlm( |
| 291 | 41x |
model, |
| 292 | 41x |
attrs$data, |
| 293 | 41x |
attrs$fpInfos, |
| 294 | 41x |
attrs$ucInfos, |
| 295 | 41x |
attrs$fixInfos, |
| 296 | 41x |
attrs$distribution, |
| 297 | 41x |
attrs$searchConfig, |
| 298 | 41x |
options, |
| 299 | 41x |
marginalz |
| 300 |
) |
|
| 301 | ||
| 302 |
## start return list |
|
| 303 | 41x |
results <- list() |
| 304 | ||
| 305 |
## add info on tbf |
|
| 306 | 41x |
results$tbf <- tbf |
| 307 | ||
| 308 |
## compute acceptance ratio |
|
| 309 | 41x |
results$acceptanceRatio <- |
| 310 | 41x |
if (tbf) {
|
| 311 | 40x |
1 |
| 312 |
} else {
|
|
| 313 |
## we do Monte Carlo in the TBF case! |
|
| 314 | 1x |
cppResults$nAccepted / mcmc@iterations |
| 315 |
} |
|
| 316 | ||
| 317 |
## are we again verbose? |
|
| 318 | 41x |
if (verbose) {
|
| 319 | 32x |
if (tbf) {
|
| 320 | 31x |
cat("\nFinished MC simulation from approximate parameter posterior\n")
|
| 321 |
} else {
|
|
| 322 | 1x |
cat( |
| 323 | 1x |
"\nFinished MCMC simulation with acceptance ratio", |
| 324 | 1x |
round(results$acceptanceRatio, 3), |
| 325 | 1x |
"\n" |
| 326 |
) |
|
| 327 |
} |
|
| 328 |
} |
|
| 329 | ||
| 330 |
## compute log marginal likelihood estimate (which is only defined up to a constant, so |
|
| 331 |
## it can only be used for model ranking (?)) |
|
| 332 | 41x |
if (estimateMargLik) {
|
| 333 | 1x |
results$logMargLik <- |
| 334 | 1x |
with( |
| 335 | 1x |
cppResults, |
| 336 | 1x |
getLogMargLikEstimate( |
| 337 | 1x |
numeratorTerms = samples$margLikNumerator, |
| 338 | 1x |
denominatorTerms = samples$margLikDenominator, |
| 339 | 1x |
highDensityPointLogUnPosterior = highDensityPointLogUnPosterior |
| 340 |
) |
|
| 341 |
) |
|
| 342 | ||
| 343 |
## append original things |
|
| 344 | 1x |
results$logMargLik <- |
| 345 | 1x |
c( |
| 346 | 1x |
results$logMargLik, |
| 347 | 1x |
list( |
| 348 | 1x |
numeratorTerms = cppResults$samples$margLikNumerator, |
| 349 | 1x |
denominatorTerms = cppResults$samples$margLikDenominator, |
| 350 | 1x |
highDensityPointLogUnPosterior = cppResults$highDensityPointLogUnPosterior |
| 351 |
) |
|
| 352 |
) |
|
| 353 |
} |
|
| 354 | ||
| 355 | ||
| 356 |
## start post-processing. |
|
| 357 | ||
| 358 |
## abbreviation for the coefficients sample matrix (nCoefs x nSamples) |
|
| 359 | 41x |
simCoefs <- |
| 360 | 41x |
results$coefficients <- cppResults$samples$coefficients |
| 361 | ||
| 362 | ||
| 363 |
## so the number of samples is: |
|
| 364 | 41x |
nSamples <- ncol(simCoefs) |
| 365 | ||
| 366 |
## the linear predictor samples for the fitted data: |
|
| 367 | 41x |
fitted <- design %*% simCoefs |
| 368 | ||
| 369 |
# give some names |
|
| 370 | ||
| 371 | 41x |
rownames(results$coefficients) <- colnames(design) |
| 372 | 1x |
if (doGlm) rownames(results$coefficients)[1] <- "(Intercept)" |
| 373 | ||
| 374 |
## samples from the predictive distribution for new data, |
|
| 375 |
## if this is required |
|
| 376 | 41x |
predictions <- |
| 377 | 41x |
if (nNewObs) {
|
| 378 |
## get new data matrix |
|
| 379 | ! |
tempX <- constructNewdataMatrix( |
| 380 | ! |
object = object, |
| 381 | ! |
newdata = newdata |
| 382 |
) |
|
| 383 | ||
| 384 |
## copy old GlmBayesMfp object |
|
| 385 | ! |
tempObj <- object |
| 386 | ||
| 387 |
## correct model matrix in tempMod to new data matrix |
|
| 388 | ! |
attr(tempObj, "data") <- |
| 389 | ! |
list( |
| 390 | ! |
x = tempX, |
| 391 | ! |
xCentered = scale(tempX, center = TRUE, scale = FALSE) |
| 392 |
) |
|
| 393 | ||
| 394 | ! |
if (correctedCenter == FALSE) {
|
| 395 |
## so we can get the design matrix |
|
| 396 | ! |
newDesign <- getDesignMatrix( |
| 397 | ! |
modelConfig = config, |
| 398 | ! |
object = tempObj, |
| 399 | ! |
intercept = doGlm |
| 400 |
) |
|
| 401 | ||
| 402 |
## so the linear predictor samples are |
|
| 403 | ! |
linPredSamples <- newDesign %*% simCoefs |
| 404 | ! |
} else if (correctedCenter == TRUE) {
|
| 405 | ! |
newDesignUC <- getDesignMatrix( |
| 406 | ! |
modelConfig = config, |
| 407 | ! |
object = tempObj, |
| 408 | ! |
intercept = doGlm, |
| 409 | ! |
center = FALSE |
| 410 |
) |
|
| 411 | ||
| 412 | ! |
oldDesignUC <- getDesignMatrix( |
| 413 | ! |
modelConfig = config, |
| 414 | ! |
object = object, |
| 415 | ! |
intercept = doGlm, |
| 416 | ! |
center = FALSE |
| 417 |
) |
|
| 418 | ||
| 419 | ! |
oldMeans <- colMeans(oldDesignUC) |
| 420 | ||
| 421 | ! |
start <- ifelse(doGlm, 2, 1) # If there is an intercept we don't want to subtract mean |
| 422 | ! |
for (k in start:length(oldMeans)) {
|
| 423 | ! |
newDesignUC[, k] <- newDesignUC[, k] - oldMeans[k] |
| 424 |
} |
|
| 425 | ||
| 426 |
## so the linear predictor samples are |
|
| 427 | ! |
linPredSamples <- newDesignUC %*% simCoefs |
| 428 |
} |
|
| 429 | ||
| 430 | ! |
linPredSamples |
| 431 |
} else {
|
|
| 432 |
## no prediction required. |
|
| 433 |
## But we return an empty matrix because that is expected |
|
| 434 |
## by the GlmBayesMfpSamples class! |
|
| 435 | 41x |
matrix( |
| 436 | 41x |
nrow = 0L, |
| 437 | 41x |
ncol = 0L |
| 438 |
) |
|
| 439 |
} |
|
| 440 | ||
| 441 |
## process all coefficients samples: fixed, FP, UC in this order. |
|
| 442 | ||
| 443 |
## now we need the colnames of the design matrix: |
|
| 444 | 41x |
colNames <- colnames(attr(object, "data")$x) |
| 445 | ||
| 446 | ||
| 447 |
## invariant: already coefCounter coefficients samples (rows of simCoefs) processed |
|
| 448 | 41x |
coefCounter <- 0L |
| 449 | ||
| 450 |
## the fixed covariate (and intercept samples) |
|
| 451 | 41x |
fixCoefs <- list() |
| 452 | ||
| 453 | 41x |
if (doGlm) {
|
| 454 | 1x |
fixName <- attrs$termNames$fixed[1] |
| 455 | 1x |
mat <- simCoefs[1, , |
| 456 | 1x |
drop = FALSE |
| 457 |
] |
|
| 458 | 1x |
coefCounter <- coefCounter + 1 |
| 459 | ||
| 460 |
## and also get the names |
|
| 461 | 1x |
rownames(mat) <- colNames[1] |
| 462 | ||
| 463 |
## and this is located in the list |
|
| 464 | 1x |
fixCoefs[[fixName]] <- mat |
| 465 |
} |
|
| 466 | ||
| 467 |
## start processing all fixed terms |
|
| 468 | 41x |
for (i in seq_along(fixList <- attrs$indices$fixed)) {
|
| 469 |
## get the name of the fixed term |
|
| 470 | 360x |
fixName <- attrs$termNames$fixed[i + doGlm] |
| 471 | ||
| 472 |
## check if this fixed covariate is included in the model |
|
| 473 | 360x |
if (i %in% config$fixTerms) {
|
| 474 |
## then we get the corresponding samples |
|
| 475 | ! |
mat <- simCoefs[ |
| 476 | ! |
coefCounter + |
| 477 | ! |
seq_len(len <- length(fixList[[i]])), , |
| 478 | ! |
drop = FALSE |
| 479 |
] |
|
| 480 | ||
| 481 |
## correct invariant |
|
| 482 | ! |
coefCounter <- coefCounter + len |
| 483 | ||
| 484 |
## and also get the names |
|
| 485 | ! |
rownames(mat) <- colNames[fixList[[i]]] |
| 486 | ||
| 487 |
## and this is located in the list |
|
| 488 | ! |
fixCoefs[[fixName]] <- mat |
| 489 |
} |
|
| 490 |
} |
|
| 491 | ||
| 492 | ||
| 493 |
## samples of fractional polynomial function values evaluated at grids will |
|
| 494 |
## be elements of this list: |
|
| 495 | 41x |
bfpCurves <- list() |
| 496 |
## Note that only those bfp terms are included which also appear in the |
|
| 497 |
## model configuration. |
|
| 498 | ||
| 499 |
## start processing all FP terms |
|
| 500 | 41x |
for (i in seq_along(attrs$indices$bfp)) {
|
| 501 |
## what is the name of this FP term? |
|
| 502 | 15x |
fpName <- attrs$termNames$bfp[i] |
| 503 | ||
| 504 |
## the powers for this FP term |
|
| 505 | 15x |
p.i <- config$powers[[fpName]] |
| 506 | ||
| 507 |
## if there is at least one power for this FP, we add a list element, else not. |
|
| 508 | 15x |
if ((len <- length(p.i)) > 0) {
|
| 509 |
## determine additional grid values: |
|
| 510 | 6x |
obs <- attrs$data$x[, attrs$indices$bfp[i], drop = FALSE] |
| 511 | ||
| 512 |
## if there is no grid in gridList, we take an additional scaled grid using the gridSize argument |
|
| 513 | 6x |
if (is.null(g <- gridList[[fpName]])) {
|
| 514 | 6x |
g <- seq( |
| 515 | 6x |
from = min(obs), |
| 516 | 6x |
to = max(obs), |
| 517 | 6x |
length = gridSize |
| 518 |
) |
|
| 519 |
} |
|
| 520 | ||
| 521 |
## the resulting total grid is: |
|
| 522 | 6x |
g <- union(obs, g) |
| 523 | 6x |
gridSizeTotal <- length(g) |
| 524 | ||
| 525 |
## sort the grid |
|
| 526 | 6x |
g <- sort(g) |
| 527 | ||
| 528 |
## and rearrange as column with name (needed for getFpTransforms) |
|
| 529 | 6x |
g <- matrix(g, |
| 530 | 6x |
nrow = gridSizeTotal, |
| 531 | 6x |
ncol = 1L, |
| 532 | 6x |
dimnames = list(NULL, fpName) |
| 533 |
) |
|
| 534 | ||
| 535 |
## the part of the design matrix corresponding to the grid |
|
| 536 | 6x |
xMat <- getFpTransforms(g, p.i, center = TRUE) |
| 537 | ||
| 538 |
## multiply that with the corresponding coefficients to get the FP curve samples |
|
| 539 | 6x |
mat <- xMat %*% simCoefs[coefCounter + seq_len(len), , drop = FALSE] |
| 540 | ||
| 541 |
## correct invariant |
|
| 542 | 6x |
coefCounter <- coefCounter + len |
| 543 | ||
| 544 |
## save the grid as an attribute of the samples |
|
| 545 | 6x |
attr(mat, "scaledGrid") <- g |
| 546 | ||
| 547 |
## save position of observed values |
|
| 548 | 6x |
attr(mat, "whereObsVals") <- match(obs, g) |
| 549 | ||
| 550 |
## then write into list |
|
| 551 | 6x |
bfpCurves[[fpName]] <- mat |
| 552 |
} |
|
| 553 |
} |
|
| 554 | ||
| 555 |
## uncertain fixed form covariates coefficients samples |
|
| 556 |
## will be elements of this list: |
|
| 557 | 41x |
ucCoefs <- list() |
| 558 |
## Note that only those UC terms are included which also appear in the model |
|
| 559 |
## configuration. |
|
| 560 | ||
| 561 | ||
| 562 |
## start processing all UC terms |
|
| 563 | 41x |
for (i in seq_along(ucList <- attrs$indices$ucList)) {
|
| 564 |
## get the name of the UC term |
|
| 565 | 228x |
ucName <- attrs$termNames$uc[i] |
| 566 | ||
| 567 |
## check if this UC is included in the model |
|
| 568 | 228x |
if (i %in% config$ucTerms) {
|
| 569 |
## then we get the corresponding samples |
|
| 570 | 119x |
mat <- simCoefs[ |
| 571 | 119x |
coefCounter + |
| 572 | 119x |
seq_len(len <- length(ucList[[i]])), , |
| 573 | 119x |
drop = FALSE |
| 574 |
] |
|
| 575 | ||
| 576 |
## correct invariant |
|
| 577 | 119x |
coefCounter <- coefCounter + len |
| 578 | ||
| 579 |
## and also get the names |
|
| 580 | 119x |
rownames(mat) <- colNames[ucList[[i]]] |
| 581 | ||
| 582 |
## and this is located in the list |
|
| 583 | 119x |
ucCoefs[[ucName]] <- mat |
| 584 |
} |
|
| 585 |
} |
|
| 586 | ||
| 587 |
## collect processed samples into S4 class object |
|
| 588 | 41x |
results$samples <- new("GlmBayesMfpSamples",
|
| 589 | 41x |
fitted = fitted, |
| 590 | 41x |
predictions = predictions, |
| 591 | 41x |
fixCoefs = fixCoefs, |
| 592 | 41x |
z = cppResults$samples$z, |
| 593 | 41x |
bfpCurves = bfpCurves, |
| 594 | 41x |
ucCoefs = ucCoefs, |
| 595 | 41x |
shiftScaleMax = attrs$shiftScaleMax, |
| 596 | 41x |
nSamples = nSamples |
| 597 |
) |
|
| 598 | ||
| 599 |
## finished post-processing. |
|
| 600 | ||
| 601 |
## finally return the whole stuff. |
|
| 602 | 41x |
return(results) |
| 603 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Isaac Gravestock [isaac *.* gravestock *a*t* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Description: |
|
| 6 |
## Construct a Survival model formula based on a glmBfp object |
|
| 7 |
## |
|
| 8 |
## History: |
|
| 9 |
## 14/07/2015 Copied from CoxTBFs project functions-funsel-2.bma.R |
|
| 10 |
##################################################################################### |
|
| 11 |
NULL |
|
| 12 | ||
| 13 |
##' Construct a survival formula based on a glmBfp object with censInd not null. |
|
| 14 |
##' |
|
| 15 |
##' This is an internal function to construct a survival formula based on a glmBfp object |
|
| 16 |
##' with censInd not null. |
|
| 17 |
##' |
|
| 18 |
##' @param models.listpart the glmBfp object for which to construct the survival formula |
|
| 19 |
##' @param time.var the name of the time variable as character string |
|
| 20 |
##' @param status.var the name of the censoring indicator variable as character string |
|
| 21 |
##' @return The formula object based on the glmBfp object. |
|
| 22 |
##' |
|
| 23 |
##' @keywords internal utilities |
|
| 24 | ||
| 25 |
writeFormula <- function(models.listpart, time.var, status.var) {
|
|
| 26 | 9x |
surv.part <- paste("survival::Surv(", time.var, ", ", status.var, ") ~ ", sep = "")
|
| 27 | ||
| 28 |
# extract linear terms |
|
| 29 | 9x |
fix.parts <- models.listpart[[1]]$configuration$fixTerms |
| 30 | 9x |
fix.names <- attributes(models.listpart)$termNames$fix |
| 31 | ||
| 32 | 9x |
uc.parts <- models.listpart[[1]]$configuration$ucTerms |
| 33 | 9x |
uc.names <- attributes(models.listpart)$termNames$uc |
| 34 | ||
| 35 | 9x |
shift <- attributes(models.listpart)$shiftScaleMax[, 1] |
| 36 | 9x |
scale <- attributes(models.listpart)$shiftScaleMax[, 2] |
| 37 | ||
| 38 | 9x |
if (length(models.listpart[[1]]$configuration$powers) > 0) {
|
| 39 |
# extract powers for terms which should have transformations |
|
| 40 | 2x |
powers <- models.listpart[[1]]$configuration$powers |
| 41 | 2x |
bfp.trans <- vector("character")
|
| 42 | 2x |
for (i in seq_along(powers)) {
|
| 43 | 8x |
this.term <- "" |
| 44 | 8x |
if (length(powers[[i]]) > 0) {
|
| 45 | 2x |
this.name <- names(powers[i]) |
| 46 | 2x |
this.power <- paste("c(", paste(powers[[i]], collapse = ",", sep = ""), ")", sep = "")
|
| 47 | 2x |
this.term <- paste("fpTrans(", this.name, ",", this.power, sep = "")
|
| 48 | ||
| 49 | 2x |
if (scale[i] != 0) {
|
| 50 | 2x |
this.term <- paste(this.term, ", ", scale[i], sep = "") |
| 51 |
} |
|
| 52 | ||
| 53 | 2x |
if (shift[i] != 0) {
|
| 54 | ! |
this.term <- paste(this.term, ", ", shift[i], sep = "") |
| 55 |
} |
|
| 56 | ||
| 57 | 2x |
this.term <- paste(this.term, ")", sep = "") |
| 58 |
} |
|
| 59 | 2x |
if (this.term != "") bfp.trans <- c(bfp.trans, this.term) |
| 60 |
} |
|
| 61 | 2x |
v <- paste(c(uc.names[uc.parts], bfp.trans, fix.names[fix.parts]), collapse = " + ") |
| 62 | 2x |
return(formula(paste(surv.part, v))) |
| 63 |
} |
|
| 64 | ||
| 65 |
# when there are no transformed variables |
|
| 66 | 7x |
v <- paste(c(uc.names[uc.parts], fix.names[fix.parts]), collapse = " + ") |
| 67 | 7x |
return(formula(paste(surv.part, v))) |
| 68 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian fractional polynomials |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[testCox.R] by DSB Mit 03/07/2013 22:58 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## R code and interface to C++ code that could be used for the Cox model |
|
| 9 |
## computations, for regression testing purposes. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 29/11/2012 file creation |
|
| 13 |
## 03/12/2012 make it internal |
|
| 14 |
## 03/07/2013 add offsets |
|
| 15 |
##################################################################################### |
|
| 16 | ||
| 17 |
##' @include helpers.R |
|
| 18 |
NULL |
|
| 19 | ||
| 20 | ||
| 21 |
##' Test the Cox model computation for the TBF approach |
|
| 22 |
##' |
|
| 23 |
##' @param survTimes the survival times |
|
| 24 |
##' @param censInd the logical censoring indicator (\code{TRUE} = observed,
|
|
| 25 |
##' \code{FALSE} = censored survival times)
|
|
| 26 |
##' @param offsets known component to be included in the linear predictor during |
|
| 27 |
##' fitting. This must be a numeric vector of length equal to the number of |
|
| 28 |
##' cases (if not provided, a vector of zeroes) |
|
| 29 |
##' @param X the design matrix, *without* the intercept 1's!, with at least one |
|
| 30 |
##' column |
|
| 31 |
##' @param useCppCode use the C++ code? (default) otherwise the R-function |
|
| 32 |
##' \code{\link{coxph}} is used
|
|
| 33 |
##' @return a list with the coefficient estimates (\code{betas}), the covariance
|
|
| 34 |
##' matrix estimate (\code{cov}) and the residual deviance (\code{deviance}).
|
|
| 35 |
##' |
|
| 36 |
##' @keywords utilities internal |
|
| 37 |
##' @importFrom survival coxph |
|
| 38 |
##' @author Daniel Sabanes Bove |
|
| 39 |
testCox <- function(survTimes, |
|
| 40 |
censInd, |
|
| 41 |
offsets = rep.int(0, length(survTimes)), |
|
| 42 |
X, |
|
| 43 |
useCppCode = FALSE) {
|
|
| 44 |
## checks |
|
| 45 | 2x |
stopifnot( |
| 46 | 2x |
is.numeric(survTimes), |
| 47 | 2x |
is.logical(censInd), |
| 48 | 2x |
is.matrix(X), |
| 49 | 2x |
identical(length(survTimes), length(censInd)), |
| 50 | 2x |
identical(length(survTimes), length(offsets)), |
| 51 | 2x |
identical(length(survTimes), nrow(X)), |
| 52 | 2x |
is.bool(useCppCode) |
| 53 |
) |
|
| 54 | ||
| 55 |
## summaries |
|
| 56 | 2x |
nObs <- nrow(X) |
| 57 | 2x |
nCovs <- ncol(X) |
| 58 | ||
| 59 |
## check that we have at least one covariate |
|
| 60 |
## (null model is not treated here) |
|
| 61 | 2x |
stopifnot(nCovs > 0) |
| 62 | ||
| 63 |
## method for handling ties: |
|
| 64 | 2x |
method <- "efron" |
| 65 | ||
| 66 |
## now do either simple R or go C++ |
|
| 67 | 2x |
if (useCppCode) {
|
| 68 |
## sort according to survival times |
|
| 69 | 1x |
sorted <- order(survTimes) |
| 70 | 1x |
survTimes <- survTimes[sorted] |
| 71 | 1x |
censInd <- censInd[sorted] |
| 72 | 1x |
X <- X[sorted, ] |
| 73 | ||
| 74 |
## go C++ for fitting the Cox model |
|
| 75 | 1x |
storage.mode(X) <- "double" |
| 76 | 1x |
tmp <- cpp_coxfit( |
| 77 | 1x |
as.double(survTimes), |
| 78 | 1x |
as.integer(censInd), |
| 79 | 1x |
as.double(offsets), |
| 80 | 1x |
X, |
| 81 | 1x |
ifelse(method == "efron", 1L, 0L) |
| 82 |
) |
|
| 83 | ||
| 84 |
## get results |
|
| 85 | 1x |
deviance <- -2 * (tmp$loglik[1] - tmp$loglik[2]) |
| 86 | 1x |
covMat <- tmp$imat |
| 87 | 1x |
betas <- tmp$coef |
| 88 |
} else {
|
|
| 89 |
## fit the Cox model |
|
| 90 | 1x |
tmp <- survival::coxph(Surv(survTimes, censInd) ~ X + offset(offsets), ties = method) |
| 91 | ||
| 92 |
## get results |
|
| 93 | 1x |
deviance <- -2 * (tmp$loglik[1] - tmp$loglik[2]) |
| 94 | 1x |
covMat <- tmp$var |
| 95 | 1x |
betas <- tmp$coefficients |
| 96 |
} |
|
| 97 | ||
| 98 |
## return the results |
|
| 99 | 2x |
return(list( |
| 100 | 2x |
betas = betas, |
| 101 | 2x |
cov = covMat, |
| 102 | 2x |
deviance = deviance |
| 103 |
)) |
|
| 104 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[GlmBayesMfpSamples-methods.R] by DSB Mon 03/12/2012 18:18 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Additional convenience methods for GlmBayesMfpSamples class objects. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 03/08/2010 file creation with a subset method |
|
| 12 |
##################################################################################### |
|
| 13 | ||
| 14 |
##' Subset method for GlmBayesMfpSamples objects |
|
| 15 |
##' |
|
| 16 |
##' Index the samples to select a subset of samples. |
|
| 17 |
##' |
|
| 18 |
##' @name GlmBayesMfpSamples-subsetting |
|
| 19 |
##' @aliases [,GlmBayesMfpSamples,ANY,missing,missing-method |
|
| 20 |
##' |
|
| 21 |
##' @usage \S4method{[}{GlmBayesMfpSamples,ANY,missing,missing}(x, i)
|
|
| 22 |
##' @param x the \code{\linkS4class{GlmBayesMfpSamples}} object
|
|
| 23 |
##' @param i the vector defining the subset of samples |
|
| 24 |
##' @return The subset of the same class. |
|
| 25 |
##' @note The function call will fail if any of the saved bfpCurves or ucCoefs |
|
| 26 |
##' does not have enough samples to be subset by \code{i} !
|
|
| 27 |
##' |
|
| 28 |
##' @seealso \code{\linkS4class{GlmBayesMfpSamples}}
|
|
| 29 |
##' @keywords methods |
|
| 30 |
##' @exportMethod "[" |
|
| 31 |
setMethod("[",
|
|
| 32 |
signature = signature( |
|
| 33 |
x = "GlmBayesMfpSamples", |
|
| 34 |
i = "ANY", |
|
| 35 |
j = "missing", |
|
| 36 |
drop = "missing" |
|
| 37 |
), |
|
| 38 |
def = function(x, i) {
|
|
| 39 | ! |
x@fitted <- x@fitted[, i, drop = FALSE] |
| 40 | ||
| 41 | ! |
if (length(x@predictions)) {
|
| 42 | ! |
x@predictions <- x@predictions[, i, drop = FALSE] |
| 43 |
} |
|
| 44 | ||
| 45 | ! |
fixCoefs <- x@fixCoefs |
| 46 | ! |
for (p in names(fixCoefs)) {
|
| 47 | ! |
fixCoefs[[p]] <- fixCoefs[[p]][, i, drop = FALSE] |
| 48 |
} |
|
| 49 | ! |
x@fixCoefs <- fixCoefs |
| 50 | ||
| 51 | ||
| 52 | ! |
x@z <- x@z[i] |
| 53 | ||
| 54 | ! |
bfpCurves <- x@bfpCurves |
| 55 | ! |
for (p in names(bfpCurves)) {
|
| 56 |
## save attributes |
|
| 57 | ! |
sg <- attr(bfpCurves[[p]], "scaledGrid") |
| 58 | ! |
wov <- attr(bfpCurves[[p]], "whereObsVals") |
| 59 | ||
| 60 | ! |
bfpCurves[[p]] <- bfpCurves[[p]][, i, drop = FALSE] |
| 61 | ||
| 62 |
## get back the attributes |
|
| 63 | ! |
attr(bfpCurves[[p]], "scaledGrid") <- sg |
| 64 | ! |
attr(bfpCurves[[p]], "whereObsVals") <- wov |
| 65 |
} |
|
| 66 | ! |
x@bfpCurves <- bfpCurves |
| 67 | ||
| 68 | ! |
ucCoefs <- x@ucCoefs |
| 69 | ! |
for (p in names(ucCoefs)) {
|
| 70 | ! |
ucCoefs[[p]] <- ucCoefs[[p]][, i, drop = FALSE] |
| 71 |
} |
|
| 72 | ! |
x@ucCoefs <- ucCoefs |
| 73 | ||
| 74 | ! |
x@nSamples <- length(x@fixCoefs[[1]]) |
| 75 | ||
| 76 | ! |
return(x) |
| 77 |
} |
|
| 78 |
) |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs. |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[inclusionProbs.R] by DSB Die 01/12/2009 13:21 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Compute (model averaged) posterior inclusion probabilities for the uncertain |
|
| 9 |
## variables (including FP variables) based on a GlmBayesMfp object. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 01/12/2009 modify from the bfp package, and add roxygen tags. |
|
| 13 |
##################################################################################### |
|
| 14 | ||
| 15 |
##' Compute posterior inclusion probabilities based on GlmBayesMfp object |
|
| 16 |
##' |
|
| 17 |
##' Compute (model averaged) posterior inclusion probabilities for the uncertain |
|
| 18 |
##' variables (including FP variables) based on a GlmBayesMfp object. |
|
| 19 |
##' |
|
| 20 |
##' @param GlmBayesMfpObject the GlmBayesMfp object |
|
| 21 |
##' @param postProbs the vector of posterior model probabilities, defaults to |
|
| 22 |
##' the normalized probabilities in \code{GlmBayesMfpObject}
|
|
| 23 |
##' @return the resulting inclusion probabilities vector |
|
| 24 |
##' |
|
| 25 |
##' @export |
|
| 26 |
##' @keywords utilities |
|
| 27 |
inclusionProbs <- function(GlmBayesMfpObject, |
|
| 28 |
postProbs = |
|
| 29 |
posteriors(GlmBayesMfpObject, |
|
| 30 |
type = "normalized" |
|
| 31 |
)) {
|
|
| 32 | ! |
postProbs <- postProbs / sum(postProbs) |
| 33 | ! |
inds <- attr(GlmBayesMfpObject, "indices") |
| 34 | ! |
termNames <- attr(GlmBayesMfpObject, "termNames") |
| 35 | ||
| 36 | ! |
nams <- unlist(termNames[c("bfp", "uc")])
|
| 37 | ! |
ret <- numeric(length(nams)) |
| 38 | ! |
names(ret) <- nams |
| 39 | ||
| 40 | ! |
i <- 0 |
| 41 | ||
| 42 | ! |
for (j in seq_along(inds$bfp)) {
|
| 43 | ! |
i <- i + 1 |
| 44 | ! |
present <- sapply(GlmBayesMfpObject, function(one) as.logical(length(one$configuration$powers[[j]]))) |
| 45 | ! |
ret[i] <- sum(present * postProbs) |
| 46 |
} |
|
| 47 | ||
| 48 | ! |
for (j in seq_along(inds$ucList)) {
|
| 49 | ! |
i <- i + 1 |
| 50 | ! |
present <- sapply(GlmBayesMfpObject, function(one) any(j == one$configuration$ucTerms)) |
| 51 | ! |
ret[i] <- sum(present * postProbs) |
| 52 |
} |
|
| 53 | ||
| 54 | ! |
return(ret) |
| 55 |
} |
|
| 56 | ||
| 57 |
## **************************************************************************************************** |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[getLogMargLikEstimate.R] by DSB Mon 05/07/2010 13:36 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Compute the Chib-Jeliazkov log marginal likelihood estimate from the MCMC output |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 18/02/2010 separate code file; |
|
| 12 |
## correct bug for computing Omega(0): head(index, -0L) always gives |
|
| 13 |
## a length zero vector instead of the whole "index"!!! |
|
| 14 |
##################################################################################### |
|
| 15 | ||
| 16 | ||
| 17 | ||
| 18 |
##' Compute the Chib-Jeliazkov log marginal likelihood estimate from the MCMC output |
|
| 19 |
##' |
|
| 20 |
##' @param numeratorTerms terms for the sum in the numerator of the posterior density ordinate |
|
| 21 |
##' estimate at the high density point |
|
| 22 |
##' @param denominatorTerms terms for the sum in the denominator of the posterior density ordinate |
|
| 23 |
##' estimate at the high density point |
|
| 24 |
##' @param highDensityPointLogUnPosterior log unnormalized posterior (i.e. likelihood times prior) |
|
| 25 |
##' of the high density point |
|
| 26 |
##' @param endLag up to which lag should the covariance incorporate the uncertainty? (default: 40) |
|
| 27 |
##' @return list with resulting \dQuote{estimate} and \dQuote{standardError}.
|
|
| 28 |
##' |
|
| 29 |
##' @keywords internal |
|
| 30 |
##' |
|
| 31 |
##' @importFrom utils head tail |
|
| 32 |
getLogMargLikEstimate <- function(numeratorTerms, |
|
| 33 |
denominatorTerms, |
|
| 34 |
highDensityPointLogUnPosterior, |
|
| 35 |
endLag = 40L) {
|
|
| 36 |
## note that here it is assumed that there are equally many numerator and |
|
| 37 |
## denominator numbers, therefore |
|
| 38 | 1x |
stopifnot( |
| 39 | 1x |
identical( |
| 40 | 1x |
length(numeratorTerms), |
| 41 | 1x |
length(denominatorTerms) |
| 42 |
), |
|
| 43 | 1x |
is.double(numeratorTerms), |
| 44 | 1x |
is.double(denominatorTerms), |
| 45 | 1x |
is.double(highDensityPointLogUnPosterior) |
| 46 |
) |
|
| 47 | ||
| 48 |
## first combine each numerator term with corresponding denominator term |
|
| 49 | 1x |
h <- rbind( |
| 50 | 1x |
numeratorTerms, |
| 51 | 1x |
denominatorTerms |
| 52 |
) |
|
| 53 | ||
| 54 |
## the estimate for that is |
|
| 55 | 1x |
hHat <- rowMeans(h) |
| 56 | ||
| 57 |
## so the estimate for the log posterior density ordinate is: |
|
| 58 | 1x |
logPosteriorDensityEstimate <- log(hHat[1]) - log(hHat[2]) |
| 59 | ||
| 60 |
## then compute the standard error for this estimate: |
|
| 61 | 1x |
hDiffHat <- h - hHat |
| 62 | ||
| 63 | 1x |
B <- ncol(hDiffHat) |
| 64 | 1x |
index <- seq_len(B) |
| 65 | ||
| 66 |
## function to compute the autocovariance estimate for a given lag |
|
| 67 | 1x |
getOmega <- function(lag) {
|
| 68 |
## get index for head part of time series |
|
| 69 | 41x |
headIndex <- |
| 70 | 41x |
if (identical(lag, 0L)) {
|
| 71 | 1x |
index |
| 72 |
} else {
|
|
| 73 | 40x |
head(index, -lag) |
| 74 |
} |
|
| 75 | ||
| 76 |
## and for tail |
|
| 77 | 41x |
tailIndex <- |
| 78 | 41x |
if (identical(lag, 0L)) {
|
| 79 | 1x |
index |
| 80 |
} else {
|
|
| 81 | 40x |
tail(index, -lag) |
| 82 |
} |
|
| 83 | ||
| 84 |
## now build up result |
|
| 85 | 41x |
ret <- matrix( |
| 86 | 41x |
data = 0, |
| 87 | 41x |
nrow = 2L, |
| 88 | 41x |
ncol = 2L |
| 89 |
) |
|
| 90 | ||
| 91 | 41x |
for (l in seq_len(B - lag)) {
|
| 92 | 1230x |
ret <- ret + tcrossprod( |
| 93 | 1230x |
hDiffHat[, tailIndex[l]], |
| 94 | 1230x |
hDiffHat[, headIndex[l]] |
| 95 | 1230x |
) / B |
| 96 |
} |
|
| 97 | 41x |
return(ret) |
| 98 |
} |
|
| 99 | ||
| 100 |
## start with Omega_0 |
|
| 101 | 1x |
covMat <- getOmega(0L) / B |
| 102 | ||
| 103 |
## then add other terms |
|
| 104 | 1x |
for (s in seq_len(endLag)) {
|
| 105 | 40x |
thisOmega <- getOmega(s) |
| 106 | 40x |
covMat <- covMat + (1 - s / (endLag + 1)) * (thisOmega + t(thisOmega)) / B |
| 107 |
} |
|
| 108 | ||
| 109 |
## now covMat is ready to get delta ruled: |
|
| 110 | 1x |
aHat <- c(1 / hHat[1], -1 / hHat[2]) |
| 111 | 1x |
se <- sqrt(t(aHat) %*% covMat %*% aHat) |
| 112 | ||
| 113 |
## return a list with the log marg lik estimate and the standard error |
|
| 114 | 1x |
return(list( |
| 115 | 1x |
estimate = highDensityPointLogUnPosterior - logPosteriorDensityEstimate, |
| 116 | 1x |
standardError = se |
| 117 |
)) |
|
| 118 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[fpScale.R] by DSB Die 16/02/2010 13:12 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Scale an FP term variable appropriately before model search using "fpScale". |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 04/07/2008 copy from thesis function collection. |
|
| 12 |
## 18/11/2009 document with Roxygen |
|
| 13 |
##################################################################################### |
|
| 14 | ||
| 15 |
##' Shift and scale a covariate vector (if wished) to have positive and small numbers. |
|
| 16 |
##' |
|
| 17 |
##' This function is (almost exactly) copied from package \dQuote{mfp} to be consistent.
|
|
| 18 |
##' |
|
| 19 |
##' @param x the covariate vector |
|
| 20 |
##' @param scaling should shifting and scaling be performed (default)? |
|
| 21 |
##' |
|
| 22 |
##' @return list of \sQuote{shift} and \sQuote{scale} parameters appropriate for this
|
|
| 23 |
##' covariate vector. |
|
| 24 |
##' |
|
| 25 |
##' @keywords internal |
|
| 26 |
fpScale <- function(x, scaling = TRUE) {
|
|
| 27 | 36x |
scale <- 1 |
| 28 | 36x |
shift <- 0 |
| 29 | ||
| 30 |
## check scaling argument |
|
| 31 | 36x |
stopifnot(is.bool(scaling)) |
| 32 | ||
| 33 | 36x |
if (isTRUE(scaling)) {
|
| 34 | 36x |
if (min(x) <= 0) {
|
| 35 | 6x |
z <- diff(sort(x)) |
| 36 | 6x |
shift <- min(z[z > 0]) - min(x) |
| 37 | 6x |
shift <- ceiling(shift * 10) / 10 |
| 38 |
} |
|
| 39 | 36x |
range <- mean(x + shift) |
| 40 | 36x |
scale <- 10^(sign(log10(range)) * round(abs(log10(range)))) |
| 41 |
} |
|
| 42 | 36x |
return(list(shift = shift, scale = scale)) |
| 43 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian fractional polynomials for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[optimize.R] by DSB Die 25/05/2010 16:20 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## R code and interface to C++ code that could be used for the marginal likelihood |
|
| 9 |
## approximation, for regression testing purposes. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 08/12/2009 file creation |
|
| 13 |
## 09/12/2009 add a wrapper which uses both cppOptimize and cppScalarBfgs |
|
| 14 |
## to reduce the number of iterations and get a good hessian at the |
|
| 15 |
## minimum |
|
| 16 |
## 15/02/2010 Note that optimBfgs does not work well! |
|
| 17 |
## 25/05/2010 remove the function which uses both optimize and bfgs; |
|
| 18 |
## move the bfgs interface to this file and rename it to "cppBfgs"; |
|
| 19 |
## add roxygen doc |
|
| 20 |
##################################################################################### |
|
| 21 | ||
| 22 | ||
| 23 |
##' Interface to the internal C++ optimization routine "bfgs" |
|
| 24 |
##' |
|
| 25 |
##' @param x0 the start value |
|
| 26 |
##' @param f_ the target function |
|
| 27 |
##' @param min.x minimum bound on x (default \code{-Inf})
|
|
| 28 |
##' @param max.x maximum bound on x (default \code{+Inf})
|
|
| 29 |
##' @param prec precision (default \code{1e-5})
|
|
| 30 |
##' @param verbose be verbose? (not default) |
|
| 31 |
##' @return A list with the following elements: |
|
| 32 |
##' \describe{
|
|
| 33 |
##' \item{par}{the minimum abscissa found by the algorithm}
|
|
| 34 |
##' \item{inv.hessian}{the inverse Hessian at \code{par}}
|
|
| 35 |
##' \item{evaluations}{list of the function evaluation pairs: \code{args} and
|
|
| 36 |
##' \code{vals}}
|
|
| 37 |
##' \item{code}{the convergence code. 0 is \dQuote{OK}, -1 is \dQuote{lost
|
|
| 38 |
##' precision}, and +1 is \dQuote{change not large enough}}
|
|
| 39 |
##' } |
|
| 40 |
##' |
|
| 41 |
##' @keywords utilities internal |
|
| 42 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 43 |
cppBfgs <- function(x0, |
|
| 44 |
f_, |
|
| 45 |
min.x = c(-Inf), |
|
| 46 |
max.x = c(+Inf), |
|
| 47 |
prec = 1e-5, |
|
| 48 |
verbose = FALSE) {
|
|
| 49 | 8x |
return(cpp_bfgs( |
| 50 | 8x |
as.double(x0), |
| 51 | 8x |
f_, |
| 52 | 8x |
as.double(min.x), |
| 53 | 8x |
as.double(max.x), |
| 54 | 8x |
as.double(prec), |
| 55 | 8x |
verbose |
| 56 |
)) |
|
| 57 |
} |
|
| 58 | ||
| 59 | ||
| 60 | ||
| 61 |
##' Interface to the internal C++ optimization routine "optimize" |
|
| 62 |
##' |
|
| 63 |
##' @param f_ the function |
|
| 64 |
##' @param min.x minimum bound on x |
|
| 65 |
##' @param max.x maximum bound on x |
|
| 66 |
##' @param prec precision (same default as for the original optimize function) |
|
| 67 |
##' @return A list with the following elements: |
|
| 68 |
##' \describe{
|
|
| 69 |
##' \item{par}{the minimum abscissa found by the algorithm}
|
|
| 70 |
##' \item{inv.hessian}{the inverse Hessian at \code{par}}
|
|
| 71 |
##' \item{evaluations}{list of the function evaluation pairs: \code{args} and
|
|
| 72 |
##' \code{vals}}
|
|
| 73 |
##' } |
|
| 74 |
##' |
|
| 75 |
##' @keywords utilities internal |
|
| 76 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 77 |
cppOptimize <- function(f_, |
|
| 78 |
min.x, |
|
| 79 |
max.x, |
|
| 80 |
prec = .Machine$double.eps^0.25) {
|
|
| 81 | 8x |
return(cpp_optimize( |
| 82 | 8x |
f_, |
| 83 | 8x |
as.double(min.x), |
| 84 | 8x |
as.double(max.x), |
| 85 | 8x |
as.double(prec) |
| 86 |
)) |
|
| 87 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[computeModels.R] by DSB Don 08/04/2010 13:48 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Compute model information (including the log marginal likelihood) |
|
| 9 |
## for a given list of model configurations and glmBayesMfp output. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 16/02/2010 file creation |
|
| 13 |
## 08/04/2010 attribute writing works as intended. |
|
| 14 |
##################################################################################### |
|
| 15 | ||
| 16 |
##' @include helpers.R |
|
| 17 |
NULL |
|
| 18 | ||
| 19 |
##' Compute model information for a given list of model configurations and glmBayesMfp output. |
|
| 20 |
##' |
|
| 21 |
##' If we want to compute the marginal likelihood and information necessary for |
|
| 22 |
##' generating posterior samples for new models not encountered in the model search |
|
| 23 |
##' done by \code{\link{glmBayesMfp}}, this function can be used: Provide it with the
|
|
| 24 |
##' models \code{configurations} to be interpreted in the context of the \code{object}
|
|
| 25 |
##' of class \code{\link{GlmBayesMfp}}. The result is again of the latter class, but contains
|
|
| 26 |
##' only the new models (similarly as the whole model space would consist of these and an |
|
| 27 |
##' exhaustive search would have been conducted). |
|
| 28 |
##' |
|
| 29 |
##' @param configurations list of the model configurations |
|
| 30 |
##' @param object the \code{\link{GlmBayesMfp}} object
|
|
| 31 |
##' @param verbose be verbose? (default: only for more than 100 configurations) |
|
| 32 |
##' @param debug be even more verbose and echo debug-level information? (not by default) |
|
| 33 |
##' @return The \code{\link{GlmBayesMfp}} object with the new models. This can directly
|
|
| 34 |
##' be used as input for \code{\link{sampleGlm}}.
|
|
| 35 |
##' |
|
| 36 |
##' @export |
|
| 37 |
##' @keywords models regression |
|
| 38 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 39 |
computeModels <- function(configurations, |
|
| 40 |
object, |
|
| 41 |
verbose = length(configurations) > 100L, |
|
| 42 |
debug = FALSE) {
|
|
| 43 |
## checks |
|
| 44 | ! |
stopifnot( |
| 45 | ! |
is.list(configurations), |
| 46 | ! |
inherits(object, "GlmBayesMfp"), |
| 47 | ! |
is.bool(verbose), |
| 48 | ! |
is.bool(debug) |
| 49 |
) |
|
| 50 | ||
| 51 |
## get all attributes of the |
|
| 52 | ! |
attrs <- attributes(object) |
| 53 | ||
| 54 |
## include the logicals |
|
| 55 | ! |
attrs$options$verbose <- verbose |
| 56 | ! |
attrs$options$debug <- debug |
| 57 | ||
| 58 |
## update the searchConfig argument to include the model configuration etc. |
|
| 59 |
## so that the C++ side knows what to do! |
|
| 60 | ||
| 61 |
## we do an "exhaustive" approach - only evaluate one model. |
|
| 62 |
## So the C++ function glmExhaustive has the responsibility to |
|
| 63 |
## handle this case. |
|
| 64 | ! |
attrs$searchConfig$doSampling <- FALSE |
| 65 | ||
| 66 |
## and these are the model configurations |
|
| 67 | ! |
attrs$searchConfig$modelConfigs <- configurations |
| 68 | ||
| 69 |
## then go C++ |
|
| 70 | ! |
result <- cpp_glmBayesMfp( |
| 71 | ! |
attrs$data, |
| 72 | ! |
attrs$fpInfos, |
| 73 | ! |
attrs$ucInfos, |
| 74 | ! |
attrs$fixInfos, |
| 75 | ! |
attrs$searchConfig, |
| 76 | ! |
attrs$distribution, |
| 77 | ! |
attrs$options |
| 78 |
) |
|
| 79 | ||
| 80 |
## C++ attaches the following attributes: |
|
| 81 | ||
| 82 |
## numVisited |
|
| 83 |
## inclusionProbs |
|
| 84 |
## logNormConst |
|
| 85 | ||
| 86 |
## we again just modify some of the attributes of the old object |
|
| 87 | ! |
attrs$numVisited <- attr(result, "numVisited") |
| 88 | ! |
attrs$inclusionProbs[] <- attr(result, "inclusionProbs") |
| 89 | ! |
attrs$logNormConst <- attr(result, "logNormConst") |
| 90 | ! |
attrs$names <- seq_along(result) |
| 91 | ||
| 92 |
## so we can save much paperwork: |
|
| 93 | ! |
attributes(result) <- attrs |
| 94 | ||
| 95 |
## finally return the results |
|
| 96 | ! |
return(result) |
| 97 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[formula.R] by DSB Fre 15/06/2012 16:04 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Helper functions for formula construction. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 18/11/2009 file creation: extra file for this seems to be better. |
|
| 12 |
## Try to get one Rd file with rdname roclets (see make.Rd2.roclet documentation |
|
| 13 |
## in roxygen package) |
|
| 14 |
## 15/06/2012 remove x documentation from uc so that it is not duplicate |
|
| 15 |
##################################################################################### |
|
| 16 | ||
| 17 |
##' Mark a covariate for transformation with fractional polynomials |
|
| 18 |
##' |
|
| 19 |
##' Using this function, you can mark a covariate for transformation with fractional polynomials |
|
| 20 |
##' in the formula for \code{\link{glmBayesMfp}}.
|
|
| 21 |
##' |
|
| 22 |
##' @param x the covariate name |
|
| 23 |
##' @param max maximum degree for this FP |
|
| 24 |
##' @param scale use pre-transformation shifting and scaling to avoid numerical problems? |
|
| 25 |
##' @param rangeVals extra numbers if the shifting and scaling should consider values in this range |
|
| 26 |
##' |
|
| 27 |
##' @return name of the provided covariate, with the other function parameters as attached attributes |
|
| 28 |
##' |
|
| 29 |
##' @keywords utilities |
|
| 30 |
##' @export |
|
| 31 |
##' @rdname formula |
|
| 32 |
bfp <- function(x, |
|
| 33 |
max = 2, |
|
| 34 |
scale = TRUE, |
|
| 35 |
rangeVals = NULL) {
|
|
| 36 | 36x |
x <- deparse(substitute(x)) |
| 37 | 36x |
attr(x, "max") <- max |
| 38 | 36x |
attr(x, "scale") <- scale |
| 39 | 36x |
attr(x, "rangeVals") <- rangeVals |
| 40 | 36x |
x |
| 41 |
} |
|
| 42 | ||
| 43 |
## **************************************************************************************************** |
|
| 44 | ||
| 45 |
##' Mark a covariate expression for joint variable selection |
|
| 46 |
##' |
|
| 47 |
##' Using this function, you can mark a covariate or a group of combined covariates for joint |
|
| 48 |
##' variable selection (\dQuote{uncertain covariate fixed form covariate groups}) in the formula for
|
|
| 49 |
##' \code{\link{glmBayesMfp}}.
|
|
| 50 |
##' |
|
| 51 |
##' @return Just the name of the provided covariate |
|
| 52 |
##' |
|
| 53 |
##' @keywords utilities |
|
| 54 |
##' @export |
|
| 55 |
##' @rdname formula |
|
| 56 |
`uc` <- function(x) {
|
|
| 57 | 54x |
x <- deparse(substitute(x)) |
| 58 | 54x |
x |
| 59 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs. |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[constructNewdataMatrix.R] by DSB Mit 06/01/2010 13:05 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Internal function to construct the covariates matrix for new data based on the formula |
|
| 9 |
## and scaling info in an existing GlmBayesMfp object, for passing it to prediction functions. |
|
| 10 |
## |
|
| 11 |
## History: |
|
| 12 |
## 10/12/2009 copy and modify from bfp package |
|
| 13 |
##################################################################################### |
|
| 14 | ||
| 15 |
##' Construct the covariates matrix for new data based on an existing GlmBayesMfp object |
|
| 16 |
##' |
|
| 17 |
##' This is an internal function, which constructs a model matrix for new covariate |
|
| 18 |
##' data, based on the formula and scaling info in an existing GlmBayesMfp object. |
|
| 19 |
##' The matrix can then be passed to prediction functions. |
|
| 20 |
##' |
|
| 21 |
##' @param object a valid \code{\link{GlmBayesMfp}}-Object
|
|
| 22 |
##' @param newdata the new covariate data as a data.frame (with the same |
|
| 23 |
##' covariate names as in the call to \code{\link{glmBayesMfp}})
|
|
| 24 |
##' @return The (uncentered!) model matrix with the FP columns shifted and scaled as for the |
|
| 25 |
##' original data. |
|
| 26 |
##' |
|
| 27 |
##' @keywords internal utilities |
|
| 28 |
constructNewdataMatrix <- function(object, |
|
| 29 |
newdata) {
|
|
| 30 |
## check class |
|
| 31 | 3x |
stopifnot(inherits(object, "GlmBayesMfp")) |
| 32 | ||
| 33 |
## extract the covariates' formula and the names of the FP terms |
|
| 34 | 3x |
covariatesOnlyFormula <- attr(object, "formula")[-2] |
| 35 | 3x |
bfpTermNames <- attr(object, "termNames")$bfp |
| 36 | ||
| 37 |
## get model matrix with the design variables for the new data |
|
| 38 | 3x |
newTerms <- terms(covariatesOnlyFormula, data = newdata) |
| 39 | 3x |
ret <- model.matrix(newTerms, data = newdata) |
| 40 | ||
| 41 |
## extract the transformation parameters |
|
| 42 | 3x |
tr <- attr(object, "shiftScaleMax") |
| 43 | ||
| 44 |
## and scale the new FP columns like for the old data |
|
| 45 | 3x |
for (bfp in bfpTermNames) {
|
| 46 |
## transform |
|
| 47 | 4x |
ret[, bfp] <- (ret[, bfp] + tr[bfp, "shift"]) / tr[bfp, "scale"] |
| 48 | ||
| 49 |
## and afterwards check range: all transformed numbers must be positive! |
|
| 50 | 4x |
if (any(ret[, bfp] <= 0)) {
|
| 51 | ! |
stop(simpleError(paste("New data for covariate", bfp, "is out of range.")))
|
| 52 |
} |
|
| 53 |
} |
|
| 54 | ||
| 55 |
## return the correctly transformed model matrix |
|
| 56 | 3x |
return(ret) |
| 57 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[evalZdensity.R] by DSB Fre 29/07/2011 15:13 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Evaluate the unnormalized marginal z density in a given model at a number of |
|
| 9 |
## z values. This is mainly an exploratory function to test new optimization / |
|
| 10 |
## integration strategies via the R interface. |
|
| 11 |
## |
|
| 12 |
## History: |
|
| 13 |
## 16/05/2010 file creation: modified from the starting point sampleGlm.R |
|
| 14 |
## 18/05/2010 modify the interface: now we accept one single model configuration |
|
| 15 |
## list, and a GlmBayesMfp object in which it is interpreted. |
|
| 16 |
## This is better since we do not depend on an already fitted |
|
| 17 |
## model. |
|
| 18 |
## 08/07/2010 add an option to get the *conditional* marginal density |
|
| 19 |
## f(y | z) instead of the usual f(y, z) which is interpreted |
|
| 20 |
## as an unnormalized f(z | y). |
|
| 21 |
## 29/07/2010 add the new option to get a better Laplace approximation in the |
|
| 22 |
## case of binary logistic regression. |
|
| 23 |
## 29/07/2011 now "higherOrderCorrection" |
|
| 24 |
##################################################################################### |
|
| 25 | ||
| 26 |
##' @include helpers.R |
|
| 27 |
NULL |
|
| 28 | ||
| 29 |
##' Evaluate the (negative log) unnormalized marginal z density in a given |
|
| 30 |
##' model. |
|
| 31 |
##' |
|
| 32 |
##' Based on the result list from \code{\link{glmBayesMfp}}, for the first model
|
|
| 33 |
##' in the list, the marginal z density can be evaluated. |
|
| 34 |
##' |
|
| 35 |
##' @param config the configuration of a single \code{GlmBayesMfp} model. The
|
|
| 36 |
##' null model is not allowed. It is interpreted in the context of |
|
| 37 |
##' \code{object}.
|
|
| 38 |
##' @param object the \code{\link{GlmBayesMfp}} object
|
|
| 39 |
##' @param zValues the z values |
|
| 40 |
##' @param conditional return the approximate *conditional* density f(y | z)? |
|
| 41 |
##' (not default) |
|
| 42 |
##' @param debug print debugging information? (not default) |
|
| 43 |
##' @param higherOrderCorrection should a higher-order correction of the |
|
| 44 |
##' Laplace approximation be used? (not default) |
|
| 45 |
##' |
|
| 46 |
##' @return the negative log marginal unnormalized density values at the |
|
| 47 |
##' \code{zValues}. (Note the words \dQuote{negative}, \dQuote{log}, and
|
|
| 48 |
##' \dQuote{unnormalized} !!!)
|
|
| 49 |
##' |
|
| 50 |
##' @keywords internal |
|
| 51 |
##' @export |
|
| 52 |
evalZdensity <- |
|
| 53 |
function(config, |
|
| 54 |
object, |
|
| 55 |
zValues, |
|
| 56 |
conditional = FALSE, |
|
| 57 |
debug = FALSE, |
|
| 58 |
higherOrderCorrection = FALSE) {
|
|
| 59 |
## check the object |
|
| 60 | 1x |
if (!inherits(object, "GlmBayesMfp")) {
|
| 61 | ! |
stop(simpleError("object must be of class GlmBayesMfp"))
|
| 62 |
} |
|
| 63 | ||
| 64 |
## other checks |
|
| 65 | 1x |
stopifnot( |
| 66 | 1x |
is.list(config), |
| 67 | 1x |
is.numeric(zValues), |
| 68 | 1x |
is.bool(conditional), |
| 69 | 1x |
is.bool(debug), |
| 70 | 1x |
is.bool(higherOrderCorrection) |
| 71 |
) |
|
| 72 | ||
| 73 |
## get the old attributes of the object |
|
| 74 | 1x |
attrs <- attributes(object) |
| 75 | ||
| 76 |
## check if the model is the null model |
|
| 77 | 1x |
modelSize <- length(unlist(config)) |
| 78 | 1x |
isNullModel <- identical(modelSize, 0L) |
| 79 | 1x |
if (isNullModel) {
|
| 80 | ! |
stop(simpleError("the null model has no z which could be sampled..."))
|
| 81 |
} |
|
| 82 | ||
| 83 |
## pack the info in a handy list: |
|
| 84 | ||
| 85 |
## pack the options |
|
| 86 | 1x |
options <- list( |
| 87 | 1x |
zValues = as.double(zValues), |
| 88 | 1x |
conditional = conditional, |
| 89 | 1x |
debug = debug, |
| 90 | 1x |
higherOrderCorrection = higherOrderCorrection |
| 91 |
) |
|
| 92 | ||
| 93 |
## then call C++ to do the rest: |
|
| 94 | 1x |
results <- cpp_evalZdensity( |
| 95 | 1x |
config, |
| 96 | 1x |
attrs$data, |
| 97 | 1x |
attrs$fpInfos, |
| 98 | 1x |
attrs$ucInfos, |
| 99 | 1x |
attrs$fixInfos, |
| 100 | 1x |
attrs$distribution, |
| 101 | 1x |
options |
| 102 |
) |
|
| 103 | ||
| 104 |
## finally return the whole stuff. |
|
| 105 | 1x |
return(results) |
| 106 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* gmx *.* net] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[McmcOptions-methods.R] by DSB Die 25/05/2010 18:01 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Functions/methods for the MCMC formal class. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 10/12/2009 copy from the master package |
|
| 12 |
##################################################################################### |
|
| 13 | ||
| 14 |
##' Compute the number of samples for a given MCMC options triple |
|
| 15 |
##' |
|
| 16 |
##' @param mcmcOptions the \code{\linkS4class{McmcOptions}} object
|
|
| 17 |
##' @return the resulting sample size |
|
| 18 |
##' |
|
| 19 |
##' @keywords programming |
|
| 20 |
##' @export |
|
| 21 |
sampleSize <- |
|
| 22 |
function(mcmcOptions) {
|
|
| 23 | 60x |
stopifnot(is(mcmcOptions, "McmcOptions")) |
| 24 | ||
| 25 | 60x |
return(as.integer(ceiling((mcmcOptions@iterations - mcmcOptions@burnin) / mcmcOptions@step))) |
| 26 |
} |
|
| 27 | ||
| 28 | ||
| 29 |
##' Convert samples to mcmc objects |
|
| 30 |
##' |
|
| 31 |
##' @param samples samples matrix or vector |
|
| 32 |
##' @param mcmcOptions the \code{\linkS4class{McmcOptions}} object which was chosen for the
|
|
| 33 |
##' production of \code{samples}
|
|
| 34 |
##' @return an S3 class \dQuote{mcmc} object
|
|
| 35 |
##' |
|
| 36 |
##' @importFrom coda mcmc |
|
| 37 |
##' @keywords programming |
|
| 38 |
##' @export |
|
| 39 |
convert2Mcmc <- function(samples, |
|
| 40 |
mcmcOptions) {
|
|
| 41 | ! |
stopifnot(is(mcmcOptions, "McmcOptions")) |
| 42 | ||
| 43 | ! |
return(coda::mcmc( |
| 44 | ! |
data = samples, |
| 45 | ! |
start = mcmcOptions@burnin + 1L, |
| 46 | ! |
end = mcmcOptions@iterations, |
| 47 | ! |
thin = mcmcOptions@step |
| 48 |
)) |
|
| 49 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanes Bove [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[posteriors.R] by DSB Die 25/05/2010 15:11 (CEST)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Extract posterior model probability estimates from a GlmBayesMfp object. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 01/12/2009 modify from bfp package, and add roxygen tags; |
|
| 12 |
## add logmarglik and logprior extractor functions |
|
| 13 |
##################################################################################### |
|
| 14 | ||
| 15 |
##' Extract posterior model probability estimates from a GlmBayesMfp object |
|
| 16 |
##' |
|
| 17 |
##' @param GlmBayesMfpObject the object |
|
| 18 |
##' @param type type of posterior model probability estimates to be extracted from |
|
| 19 |
##' \code{GlmBayesMfpObject}
|
|
| 20 |
##' @return the requested probs from all models |
|
| 21 |
##' |
|
| 22 |
##' @export |
|
| 23 |
##' @keywords utilities |
|
| 24 |
posteriors <- function(GlmBayesMfpObject, |
|
| 25 |
type = c("normalized", "sampling")) {
|
|
| 26 |
## match the type argument |
|
| 27 | 26x |
type <- match.arg(type) |
| 28 | ||
| 29 |
## index is translated from type |
|
| 30 | 26x |
ind <- switch(type, |
| 31 | 26x |
normalized = 1L, |
| 32 | 26x |
sampling = 2L |
| 33 |
) |
|
| 34 | ||
| 35 |
## check ... |
|
| 36 |
## ... does the first model have the probability estimate? |
|
| 37 | 26x |
stopifnot(!is.na(GlmBayesMfpObject[[1]]$information$posterior[ind])) |
| 38 | ||
| 39 |
## now extract the requested probs from all models |
|
| 40 | 26x |
sapply(GlmBayesMfpObject, function(one) one$information$posterior[ind]) |
| 41 |
} |
|
| 42 | ||
| 43 |
## **************************************************************************************************** |
|
| 44 | ||
| 45 |
##' Extract the log marginal likelihood estimates from a GlmBayesMfp object |
|
| 46 |
##' |
|
| 47 |
##' @param GlmBayesMfpObject the object |
|
| 48 |
##' @return the vector of log marginal likelihood estimates |
|
| 49 |
##' |
|
| 50 |
##' @export |
|
| 51 |
##' @keywords utilities |
|
| 52 |
logMargLiks <- function(GlmBayesMfpObject) {
|
|
| 53 | 16x |
sapply( |
| 54 | 16x |
GlmBayesMfpObject, |
| 55 | 16x |
function(one) one$information$logMargLik |
| 56 |
) |
|
| 57 |
} |
|
| 58 | ||
| 59 |
## **************************************************************************************************** |
|
| 60 | ||
| 61 |
##' Extract the log prior values from a GlmBayesMfp object |
|
| 62 |
##' |
|
| 63 |
##' @param GlmBayesMfpObject the object |
|
| 64 |
##' @return the vector of log prior values |
|
| 65 |
##' |
|
| 66 |
##' @export |
|
| 67 |
##' @keywords utilities |
|
| 68 |
logPriors <- function(GlmBayesMfpObject) {
|
|
| 69 | 16x |
sapply( |
| 70 | 16x |
GlmBayesMfpObject, |
| 71 | 16x |
function(one) one$information$logPrior |
| 72 |
) |
|
| 73 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* gmx *.* net] |
|
| 3 |
## Project: Bayesian FPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[McmcOptions-class.R] by DSB Mit 15/12/2010 13:57 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Encapsulate the three canonical MCMC options in a formal class. |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 10/12/2009 copy from the master package |
|
| 12 |
## 25/05/2010 include the optional argument "samples" in the constructor |
|
| 13 |
## 08/07/2010 coerce default iterations to integer in constructor |
|
| 14 |
## 13/12/2010 "samples" must have default value in the ctr |
|
| 15 |
##################################################################################### |
|
| 16 | ||
| 17 |
##' Class for the three canonical MCMC options |
|
| 18 |
##' |
|
| 19 |
##' The slots are: |
|
| 20 |
##' \describe{
|
|
| 21 |
##' \item{iterations}{number of MCMC iterations}
|
|
| 22 |
##' \item{burnin}{number of burn-in iterations which are not saved}
|
|
| 23 |
##' \item{step}{only every step-th iteration is saved after the burn-in}
|
|
| 24 |
##' } |
|
| 25 |
##' |
|
| 26 |
##' @name McmcOptions-class |
|
| 27 |
##' @keywords classes internal |
|
| 28 |
setClass( |
|
| 29 |
Class = "McmcOptions", |
|
| 30 |
representation = |
|
| 31 |
representation( |
|
| 32 |
iterations = "integer", |
|
| 33 |
burnin = "integer", |
|
| 34 |
step = "integer" |
|
| 35 |
), |
|
| 36 |
validity = function(object) {
|
|
| 37 |
if (object@burnin < 0L) {
|
|
| 38 |
return("Burn-in must be non-negative")
|
|
| 39 |
} else if (object@burnin >= object@iterations) {
|
|
| 40 |
return("Burn-in must be smaller than iterations")
|
|
| 41 |
} else if (object@step < 1) {
|
|
| 42 |
return("Step size must be at least 1")
|
|
| 43 |
} |
|
| 44 |
} |
|
| 45 |
) |
|
| 46 | ||
| 47 |
##' Constructor for class McmcOptions |
|
| 48 |
##' |
|
| 49 |
##' Note that the argument \code{samples} is included for convenience only -
|
|
| 50 |
##' you can specify it instead of \code{iterations}.
|
|
| 51 |
##' |
|
| 52 |
##' @param iterations number of MCMC iterations (default: \code{110,000})
|
|
| 53 |
##' @param burnin number of burn-in iterations which are not saved (default: |
|
| 54 |
##' \code{10,000})
|
|
| 55 |
##' @param step only every step-th iteration is saved after the burn-in |
|
| 56 |
##' (default: \code{10})
|
|
| 57 |
##' @param samples number of resulting samples (by default \code{10,000} will result)
|
|
| 58 |
##' @return the freshly built object of class \code{\linkS4class{McmcOptions}}
|
|
| 59 |
##' |
|
| 60 |
##' @keywords classes |
|
| 61 |
##' @export |
|
| 62 |
McmcOptions <- |
|
| 63 |
function(iterations = as.integer(burnin + (step * samples)), |
|
| 64 |
burnin = 1e4L, |
|
| 65 |
step = 10L, |
|
| 66 |
samples = 1e4L) {
|
|
| 67 | 102x |
return(new( |
| 68 | 102x |
Class = "McmcOptions", |
| 69 | 102x |
iterations = iterations, |
| 70 | 102x |
burnin = burnin, |
| 71 | 102x |
step = step |
| 72 |
)) |
|
| 73 |
} |
| 1 |
##################################################################################### |
|
| 2 |
## Author: Daniel Sabanés Bové [daniel *.* sabanesbove *a*t* ifspm *.* uzh *.* ch] |
|
| 3 |
## Project: BFPs for GLMs |
|
| 4 |
## |
|
| 5 |
## Time-stamp: <[helpers.R] by DSB Die 16/02/2010 13:10 (CET)> |
|
| 6 |
## |
|
| 7 |
## Description: |
|
| 8 |
## Helper functions |
|
| 9 |
## |
|
| 10 |
## History: |
|
| 11 |
## 16/02/2010 file creation |
|
| 12 |
##################################################################################### |
|
| 13 | ||
| 14 |
##' Predicate checking for a boolean option |
|
| 15 |
##' |
|
| 16 |
##' @param x the object being checked |
|
| 17 |
##' @return Returns \code{TRUE} if \code{x} is a length one logical vector (i.e., a
|
|
| 18 |
##' scalar) |
|
| 19 |
##' |
|
| 20 |
##' @keywords internal |
|
| 21 |
##' @author Daniel Sabanes Bove \email{daniel.sabanesbove@@ifspm.uzh.ch}
|
|
| 22 |
is.bool <- function(x) {
|
|
| 23 | 401x |
return(identical(length(x), 1L) && is.logical(x)) |
| 24 |
} |
| 1 |
/* |
|
| 2 |
* bfgs.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 11.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#include "bfgs.h" |
|
| 9 |
#include "rcppExport.h" |
|
| 10 |
#include "functionWraps.h" |
|
| 11 | ||
| 12 |
using namespace Rcpp; |
|
| 13 | ||
| 14 |
// ***************************************************************************************************// |
|
| 15 | ||
| 16 | ||
| 17 | ||
| 18 |
// just an R interface to bfgs, for regression testing purposes. |
|
| 19 |
// [[Rcpp::export]] |
|
| 20 |
SEXP |
|
| 21 | 152x |
cpp_bfgs(SEXP r_startval, SEXP r_function, SEXP r_minx, SEXP r_maxx, SEXP r_precision, SEXP r_verbose ) |
| 22 |
{
|
|
| 23 |
// extract function and start value for minimization |
|
| 24 |
// r_interface = CDR(r_interface); |
|
| 25 |
// SEXP r_startval = CAR(r_interface); |
|
| 26 |
// |
|
| 27 |
// r_interface = CDR(r_interface); |
|
| 28 |
// SEXP r_function = CAR(r_interface); |
|
| 29 |
// |
|
| 30 |
// // constraints on the argument |
|
| 31 |
// r_interface = CDR(r_interface); |
|
| 32 |
// SEXP r_minx = CAR(r_interface); |
|
| 33 |
// |
|
| 34 |
// r_interface = CDR(r_interface); |
|
| 35 |
// SEXP r_maxx= CAR(r_interface); |
|
| 36 |
// |
|
| 37 |
// // and the settings |
|
| 38 |
// r_interface = CDR(r_interface); |
|
| 39 |
// SEXP r_precision = CAR(r_interface); |
|
| 40 |
// |
|
| 41 |
// r_interface = CDR(r_interface); |
|
| 42 |
// SEXP r_verbose = CAR(r_interface); |
|
| 43 | ||
| 44 |
// wrap the R function to a cached function |
|
| 45 | 152x |
RFunction fun(r_function); |
| 46 | 152x |
CachedFunction<RFunction> cachedFun(fun); |
| 47 | ||
| 48 |
// then minimize (with constraints) |
|
| 49 |
Bfgs<CachedFunction<RFunction> > bfgs(cachedFun, |
|
| 50 | 304x |
Rf_asLogical(r_verbose) == 1, |
| 51 |
Rf_asReal(r_minx), |
|
| 52 |
Rf_asReal(r_maxx), |
|
| 53 | 152x |
Rf_asReal(r_precision)); |
| 54 | ||
| 55 | 152x |
double xMin = 0.0; |
| 56 | 152x |
double invHessMin = 0.0; |
| 57 | ||
| 58 | 152x |
int code = bfgs.minimize(Rf_asReal(r_startval), |
| 59 |
xMin, |
|
| 60 | 152x |
invHessMin); |
| 61 | ||
| 62 |
// pack results into R list |
|
| 63 | 304x |
return List::create(_["par"] = xMin, |
| 64 | 304x |
_["inv.hessian"] = invHessMin, |
| 65 | 304x |
_["evaluations"] = cachedFun.getCache().convert2list(), |
| 66 | 608x |
_["code"] = code); |
| 67 |
} |
|
| 68 | ||
| 69 |
// ***************************************************************************************************// |
|
| 70 | ||
| 71 |
// End of file. |
|
| 72 |
| 1 |
/* |
|
| 2 |
* bfgs.h |
|
| 3 |
* |
|
| 4 |
* Created on: 13.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef BFGS_H_ |
|
| 9 |
#define BFGS_H_ |
|
| 10 | ||
| 11 |
#include "functionWraps.h" |
|
| 12 | ||
| 13 |
// ***************************************************************************************************// |
|
| 14 | ||
| 15 | ||
| 16 |
// the main Bfgs class |
|
| 17 |
template<class Fun> |
|
| 18 |
class Bfgs {
|
|
| 19 |
public: |
|
| 20 |
// setup the object |
|
| 21 |
// - function is the function to be maximized (possibly constrained) |
|
| 22 |
// - verbose: print progress on R screen? |
|
| 23 |
// - lowerBound: lower bound on the function argument (default: -Inf) |
|
| 24 |
// - upperBound: upper bound on the function argument (default: +Inf) |
|
| 25 |
// - precision is precision parameter for the flatness at the maximum. |
|
| 26 |
// - ftol and gtol are convergence parameters |
|
| 27 |
// - stepsize is a parameter for the first ("crude") step of the algorithm
|
|
| 28 | 72x |
Bfgs(Fun& function, |
| 29 |
bool verbose=false, |
|
| 30 |
double lowerBound=R_NegInf, |
|
| 31 |
double upperBound=R_PosInf, |
|
| 32 |
double precision=0.00001, |
|
| 33 |
double ftol=0.0001, |
|
| 34 |
double gtol=0.9, |
|
| 35 |
double stepsize=3.0) : |
|
| 36 | 72x |
function(function), |
| 37 | 72x |
functionDeriv(function), |
| 38 | 72x |
verbose(verbose), |
| 39 | 72x |
lowerBound(lowerBound), |
| 40 | 72x |
upperBound(upperBound), |
| 41 | 72x |
precision(precision), |
| 42 | 72x |
ftol(ftol), |
| 43 | 72x |
gtol(gtol), |
| 44 | 72x |
stepsize(stepsize) |
| 45 |
{
|
|
| 46 |
} |
|
| 47 | ||
| 48 |
// minimize the function and compute the minimum and the inverted hessian at the minimum |
|
| 49 |
int |
|
| 50 |
minimize (double x0, double& xMin, double& invHessMin); |
|
| 51 | ||
| 52 |
private: |
|
| 53 |
// the function we want to maximize |
|
| 54 |
Fun& function; |
|
| 55 | ||
| 56 |
// and its numerical derivative |
|
| 57 |
NumericDerivative<Fun> functionDeriv; |
|
| 58 | ||
| 59 |
// echo progress? |
|
| 60 |
const bool verbose; |
|
| 61 | ||
| 62 |
// constraints on the function argument |
|
| 63 |
const double lowerBound; |
|
| 64 |
const double upperBound; |
|
| 65 | ||
| 66 |
// settings |
|
| 67 |
const double precision; |
|
| 68 |
const double ftol; |
|
| 69 |
const double gtol; |
|
| 70 |
const double stepsize; |
|
| 71 | ||
| 72 | ||
| 73 |
// internal linesearch class: |
|
| 74 |
// (benefit is encapsulation and access to parent class members, |
|
| 75 |
// here function, functionDeriv, and settings) |
|
| 76 |
class Linesearch {
|
|
| 77 |
public: |
|
| 78 | 414x |
Linesearch(const Bfgs& bfgs, // reference to instance of enclosing class must be passed!! |
| 79 |
// there is no automatic connection of nested class instances to the enclosing class instance. |
|
| 80 |
double x, |
|
| 81 |
double direction) : |
|
| 82 | 414x |
bfgs(bfgs), |
| 83 | 414x |
x(x), |
| 84 | 414x |
direction(direction), |
| 85 | 414x |
maxAlpha((direction < 0) ? |
| 86 | 18x |
((bfgs.lowerBound - x) / direction) : |
| 87 | 396x |
((bfgs.upperBound - x) / direction)) |
| 88 |
{
|
|
| 89 |
} |
|
| 90 | ||
| 91 |
// do the linesearch for a given start point alpha1 |
|
| 92 |
double |
|
| 93 |
operator()(double alpha1) const; |
|
| 94 | ||
| 95 |
private: |
|
| 96 | ||
| 97 |
// reference to the enclosing object |
|
| 98 |
const Bfgs& bfgs; |
|
| 99 | ||
| 100 |
// the point x and the direction of the search |
|
| 101 |
const double x; |
|
| 102 |
const double direction; |
|
| 103 | ||
| 104 |
// the maximum alpha to be chosen: set by the constructor. |
|
| 105 |
const double maxAlpha; |
|
| 106 | ||
| 107 |
// objective and derivative of objective function are member functions |
|
| 108 |
double |
|
| 109 | 5004x |
phi(double alpha) const |
| 110 |
{
|
|
| 111 | 5004x |
return bfgs.function(x + alpha * direction); |
| 112 |
} |
|
| 113 | ||
| 114 |
double |
|
| 115 | 3366x |
phi_(double alpha) const |
| 116 |
{
|
|
| 117 | 3366x |
return bfgs.functionDeriv(x + alpha * direction) * direction; |
| 118 |
} |
|
| 119 | ||
| 120 |
// other helper functions |
|
| 121 |
bool |
|
| 122 | 1116x |
armijo(double alpha) const |
| 123 |
{
|
|
| 124 | 1116x |
return phi(alpha) <= (phi(0.0) + bfgs.ftol * alpha * phi_(0.0)); |
| 125 |
} |
|
| 126 | ||
| 127 |
bool |
|
| 128 | 522x |
curvature(double alpha) const |
| 129 |
{
|
|
| 130 | 522x |
return fabs(phi_(alpha)) <= - bfgs.gtol * phi_(0.0); |
| 131 |
} |
|
| 132 | ||
| 133 |
double |
|
| 134 |
interpolate(double x0, |
|
| 135 |
double x1, |
|
| 136 |
double f0, |
|
| 137 |
double f1, |
|
| 138 |
double g0) const; |
|
| 139 | ||
| 140 |
double |
|
| 141 |
zoom(double alpha_lo, |
|
| 142 |
double alpha_hi) const; |
|
| 143 |
}; |
|
| 144 |
}; |
|
| 145 | ||
| 146 | ||
| 147 |
// ***************************************************************************************************// |
|
| 148 | ||
| 149 |
template <class Fun> |
|
| 150 |
double |
|
| 151 | 90x |
Bfgs<Fun>::Linesearch::zoom(double alpha_lo, |
| 152 |
double alpha_hi) const |
|
| 153 |
{
|
|
| 154 | 684x |
for (int i = 0; i < 30; ++i) |
| 155 |
{
|
|
| 156 | 666x |
double alpha = interpolate(alpha_lo, alpha_hi, phi(alpha_lo), phi(alpha_hi), phi_(alpha_lo)); |
| 157 | ||
| 158 | 666x |
if ((! armijo(alpha)) || (phi(alpha) >= phi(alpha_lo))) |
| 159 |
{
|
|
| 160 | 540x |
alpha_hi = alpha; |
| 161 |
} |
|
| 162 |
else |
|
| 163 |
{
|
|
| 164 | 126x |
if (curvature(alpha)) |
| 165 |
{
|
|
| 166 | 72x |
return alpha; |
| 167 |
} |
|
| 168 | ||
| 169 |
// started going uphill? |
|
| 170 | 54x |
if (phi_(alpha) * (alpha_hi - alpha_lo) >= 0.0) |
| 171 |
{
|
|
| 172 | 54x |
alpha_hi = alpha_lo; |
| 173 |
} |
|
| 174 | 54x |
alpha_lo = alpha; |
| 175 |
} |
|
| 176 |
} |
|
| 177 | 18x |
return 0.0; // not enough progress; give up. |
| 178 |
} |
|
| 179 | ||
| 180 | ||
| 181 |
// Implements the More-Thuente linesearch algorithm. The notation is taken from |
|
| 182 |
// Nocedal and Wright (2006). This function returns an approximate solution to |
|
| 183 |
// |
|
| 184 |
// argmin_{alpha \in [0, maxAlpha]} phi(alpha).
|
|
| 185 |
// |
|
| 186 |
// To avoid getting stuck in local minima, the algorithm attempts to find a |
|
| 187 |
// point that satisfies the Wolfe conditions: |
|
| 188 |
// - Armijo condition: the function has decreased enough. This is controlled |
|
| 189 |
// by the "ftol" parameter. |
|
| 190 |
// - curvature condition: the function has flattened out enough. This is |
|
| 191 |
// controlled by the "gtol" parameter. |
|
| 192 |
// (It will succeed at this, unless machine precision gets in the way.) |
|
| 193 |
// |
|
| 194 |
// The basic idea of the algorithm is: |
|
| 195 |
// - do a crude search to find an interval that contains a local minimum |
|
| 196 |
// that satisfies the Wolfe conditions. |
|
| 197 |
// - do a bisection-like search on that interval, until a point is found |
|
| 198 |
// that satisfies the Wolfe conditions. |
|
| 199 |
// |
|
| 200 |
// In particular, the algorithm doesn't attempt to find a local minimizer. |
|
| 201 |
// It just tries to find a rough guess, which is much faster. The function |
|
| 202 |
// that calls linesearch will do it many times. Since the Wolfe conditions |
|
| 203 |
// are relative to the starting point of "linesearch", each subsequent call |
|
| 204 |
// will give tighter approximations. |
|
| 205 |
// |
|
| 206 |
// The algorithm will terminate if the objective eventually starts decreasing, |
|
| 207 |
// or a finite constraint on alpha is specified. |
|
| 208 |
// |
|
| 209 |
// Parameters: |
|
| 210 |
// - phi : R -> R is the objective function to minimize. |
|
| 211 |
// - phi_ : R -> R is the derivative of phi. |
|
| 212 |
// - alpha1 is the starting point. |
|
| 213 |
// - maxAlpha is an upper-bound constraint on alpha (can be Inf). |
|
| 214 |
template <class Fun> |
|
| 215 |
double |
|
| 216 | 414x |
Bfgs<Fun>::Linesearch::operator()(double alpha1) const |
| 217 |
{
|
|
| 218 |
// initial check |
|
| 219 | 414x |
if (! (phi_(0) < 0.0)) |
| 220 |
{
|
|
| 221 | ! |
Rf_warning("\nBfgs: phi_(0) >= 0.0 in linesearch algorithm");
|
| 222 |
} |
|
| 223 | ||
| 224 |
// initialize old and new alpha |
|
| 225 | 414x |
double alpha_ = 0.0; |
| 226 | 414x |
double alpha = (alpha1 >= maxAlpha) ? maxAlpha / 2.0 : alpha1; |
| 227 | ||
| 228 |
// iterate at most 100 times |
|
| 229 | 468x |
for (int i = 0; i < 100; ++i) |
| 230 |
{
|
|
| 231 | 468x |
if ((i > 0 && (phi(alpha) >= phi(alpha_))) || (! armijo(alpha))) |
| 232 |
{
|
|
| 233 | 72x |
return zoom(alpha_, alpha); |
| 234 |
} |
|
| 235 | 396x |
else if (curvature(alpha)) |
| 236 |
{
|
|
| 237 | 324x |
return alpha; |
| 238 |
} |
|
| 239 | 72x |
else if (phi_(alpha) >= 0.0) |
| 240 |
{
|
|
| 241 | 18x |
return zoom(alpha, alpha_); |
| 242 |
} |
|
| 243 |
else |
|
| 244 |
{
|
|
| 245 | 54x |
alpha_ = alpha; |
| 246 | 54x |
alpha = fmin((alpha + maxAlpha) / 2.0, |
| 247 | 54x |
alpha * bfgs.stepsize); |
| 248 |
} |
|
| 249 |
} |
|
| 250 | ||
| 251 |
// return the last alpha if we did not break out of the loop beforehand. |
|
| 252 | ! |
return alpha; |
| 253 |
} |
|
| 254 | ||
| 255 |
// ***************************************************************************************************// |
|
| 256 | ||
| 257 |
// the actual minimization function: |
|
| 258 |
// returns |
|
| 259 |
// -1 lost precision |
|
| 260 |
// 0 ok |
|
| 261 |
// 1 change not large enough |
|
| 262 |
template <class Fun> |
|
| 263 |
int |
|
| 264 | 72x |
Bfgs<Fun>::minimize(double x0, double& xMin, double& invHessMin) |
| 265 |
{
|
|
| 266 |
// first check that start value fulfills constraints. |
|
| 267 | ! |
const bool insideBounds = (x0 >= lowerBound) && (x0 <= upperBound); |
| 268 | 72x |
if(! insideBounds) |
| 269 |
{
|
|
| 270 | ! |
Rf_error("Start value x0=%f for BFGS minimization not in admissible interval [%f, %f]", x0, lowerBound, upperBound);
|
| 271 |
} |
|
| 272 | ||
| 273 |
// initialization |
|
| 274 | 72x |
xMin = x0; |
| 275 | 72x |
invHessMin = 1.0; |
| 276 | ||
| 277 | 72x |
int iter = 0; |
| 278 | ||
| 279 | 72x |
if(verbose) |
| 280 |
{
|
|
| 281 | 72x |
Rprintf("\nBfgs: Starting BFGS minimization ...");
|
| 282 |
} |
|
| 283 | ||
| 284 |
// as long as the gradient is far enough away from zero |
|
| 285 | 450x |
while (fabs(functionDeriv(xMin)) > precision) |
| 286 |
{
|
|
| 287 | 414x |
++iter; |
| 288 | ||
| 289 |
// possibly echo progress |
|
| 290 | 414x |
if (verbose) |
| 291 |
{
|
|
| 292 | 414x |
Rprintf("\nBfgs: now at iteration %d", iter);
|
| 293 |
} |
|
| 294 | ||
| 295 |
// minimize in the direction of p |
|
| 296 | 414x |
double p = - invHessMin * functionDeriv(xMin); |
| 297 | ||
| 298 |
// linesearch for factor alpha, starting from alpha = 1, |
|
| 299 |
// with point x into direction p. |
|
| 300 | ||
| 301 |
// first get maximum alpha |
|
| 302 | ||
| 303 | ||
| 304 | 414x |
double alpha = Linesearch(*this, xMin, p)(1.0); |
| 305 | ||
| 306 |
// check result of linesearch: |
|
| 307 |
// have we lost precision? |
|
| 308 | 414x |
if (alpha == 0.0) |
| 309 |
{
|
|
| 310 |
// then warn |
|
| 311 | 18x |
if(verbose) |
| 312 |
{
|
|
| 313 | 18x |
Rprintf("\nBfgs: Lost precision in linesearch of iteration %d", iter);
|
| 314 | 18x |
Rprintf("\nBfgs: Finished minimization.");
|
| 315 |
} |
|
| 316 | ||
| 317 |
// and give back the current results and the error code |
|
| 318 | 18x |
return(-1); |
| 319 |
} |
|
| 320 | 396x |
else if ((2.0 * fabs(alpha * p)) < precision) |
| 321 |
{
|
|
| 322 |
// also break if the change is not large enough. |
|
| 323 | 18x |
if (verbose) |
| 324 |
{
|
|
| 325 | 18x |
Rprintf("\nBfgs: Change not large enough in iteration %d", iter);
|
| 326 | 18x |
Rprintf("\nBfgs: Finished minimization.");
|
| 327 |
} |
|
| 328 | ||
| 329 | 18x |
return(1); |
| 330 |
} |
|
| 331 | ||
| 332 |
// compute the new x |
|
| 333 | 378x |
double x_ = xMin + alpha * p; |
| 334 | ||
| 335 |
// inverted Hessian is numeric deriv of g at the new x_: |
|
| 336 | 378x |
invHessMin = (x_ - xMin) / (functionDeriv(x_) - functionDeriv(xMin)); |
| 337 | ||
| 338 |
// so now we are at x_ |
|
| 339 | 378x |
xMin = x_; |
| 340 |
} |
|
| 341 | ||
| 342 | 36x |
if(verbose) |
| 343 |
{
|
|
| 344 | 36x |
Rprintf("\nBfgs: Finished minimization.");
|
| 345 |
} |
|
| 346 | ||
| 347 |
// return the convergence code |
|
| 348 | 36x |
return 0; |
| 349 |
} |
|
| 350 | ||
| 351 |
// ***************************************************************************************************// |
|
| 352 | ||
| 353 |
// does quadratic interpolation to solve for the minimum on [x0, x1] |
|
| 354 |
// f(x) = ax^2 + bx + c |
|
| 355 |
// |
|
| 356 |
// (1) f(x0) = f0 => f0 = a x0^2 + b x0 + c |
|
| 357 |
// (2) f(x1) = f1 => f1 = a x1^2 + b x1 + c |
|
| 358 |
// (3) f'(x0) = g0 => g0 = 2a x0 + b |
|
| 359 |
// |
|
| 360 |
// (1)-(2) f0 - f1 = a (x0^2 - x1^2) + b(x0 - x1) |
|
| 361 |
// from (3): b = g0 - 2a x0 |
|
| 362 |
// from (1)-(2): f0 - f1 = a (x0^2 - x1^2) + (g0 - 2a x0)(x0 - x1) |
|
| 363 |
// f0 - f1 - g0(x0 - x1) = a (x0^2 - x1^2) - 2a x0(x0 - x1) |
|
| 364 |
// f0 - f1 - g0(x0 - x1) = a [(x0^2 - x1^2) - 2 x0(x0 - x1)] |
|
| 365 |
// a = [f0 - f1 - g0(x0 - x1)] / [(x0^2 - x1^2) - 2 x0(x0 - x1)] |
|
| 366 |
// = - [f0 - f1 - g0(x0 - x1)] / [(x0 - x1)^2] |
|
| 367 |
// |
|
| 368 |
// Then, f'(x) = 2ax + b. The "minimum" is at x = - b / (2a). |
|
| 369 |
// (corrected here by DSB, it's not at x = -2a / b !!) |
|
| 370 |
// of course, it is only the minimum if f''(x) = 2a > 0, |
|
| 371 |
// which implies that f0 - f1 - g0(x0 - x1) < 0, |
|
| 372 |
// or in other words g0 < (f0 - f1) / (x0 - x1) if x0 < x1. |
|
| 373 |
template <class Fun> |
|
| 374 |
double |
|
| 375 | 666x |
Bfgs<Fun>::Linesearch::interpolate(double x0, |
| 376 |
double x1, |
|
| 377 |
double f0, |
|
| 378 |
double f1, |
|
| 379 |
double g0) const |
|
| 380 |
{
|
|
| 381 | 666x |
double dx = x0 - x1; |
| 382 | 666x |
double a = - (f0 - f1 - g0 * dx) / (dx * dx); |
| 383 | 666x |
double b = g0 - 2.0 * a * x0; |
| 384 | ||
| 385 | 666x |
double x = - b / (2.0 * a); |
| 386 | ||
| 387 |
// if it fails: return just the mid-point of [x0, x1] |
|
| 388 | 666x |
if ((R_finite(x) != 1) || (x < fmin(x0, x1)) || (x > fmax(x0, x1))) |
| 389 |
{
|
|
| 390 | 54x |
return (x0 + x1) / 2.0; |
| 391 |
} |
|
| 392 |
else |
|
| 393 |
{
|
|
| 394 | 612x |
return x; |
| 395 |
} |
|
| 396 |
} |
|
| 397 | ||
| 398 |
// ***************************************************************************************************// |
|
| 399 | ||
| 400 | ||
| 401 |
#endif /* BFGS_H_ */ |
| 1 |
/* |
|
| 2 |
* functionWraps.h |
|
| 3 |
* |
|
| 4 |
* Created on: 06.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef FUNCTIONWRAPS_H_ |
|
| 9 |
#define FUNCTIONWRAPS_H_ |
|
| 10 | ||
| 11 |
#include "rcppExport.h" |
|
| 12 |
#include "types.h" |
|
| 13 | ||
| 14 |
// ***************************************************************************************************// |
|
| 15 | ||
| 16 |
// wrap a simple R function, |
|
| 17 |
// taking a double argument and returning a double. |
|
| 18 |
class RFunction |
|
| 19 |
{
|
|
| 20 |
public: |
|
| 21 |
// constructor |
|
| 22 | 447x |
RFunction(SEXP R_fun) : |
| 23 | 447x |
fun(R_fun) |
| 24 |
{}
|
|
| 25 | ||
| 26 |
// // copy constructor |
|
| 27 |
// RFunction(const RFunction& old); |
|
| 28 |
// |
|
| 29 |
// // destructor |
|
| 30 |
// ~RFunction(); |
|
| 31 |
// |
|
| 32 |
// // assignment operator |
|
| 33 |
// RFunction& operator=(const RFunction& rhs); |
|
| 34 | ||
| 35 |
// for use as a function (function object) |
|
| 36 |
double |
|
| 37 |
operator()(double x) const; |
|
| 38 | ||
| 39 |
private: |
|
| 40 |
Rcpp::Function fun; |
|
| 41 |
}; |
|
| 42 | ||
| 43 |
// ***************************************************************************************************// |
|
| 44 | ||
| 45 |
// now wrap an R function taking vectors of fixed length |
|
| 46 |
class VectorRFunction |
|
| 47 |
{
|
|
| 48 |
public: |
|
| 49 |
// constructor |
|
| 50 |
VectorRFunction(SEXP R_function, R_len_t argsize) : |
|
| 51 |
fun(R_function), |
|
| 52 |
argsize(argsize) |
|
| 53 |
{}
|
|
| 54 | ||
| 55 |
// for use as a function (function object) |
|
| 56 |
double |
|
| 57 |
operator()(const double* vector) const; |
|
| 58 | ||
| 59 |
private: |
|
| 60 |
Rcpp::Function fun; |
|
| 61 |
PosInt argsize; |
|
| 62 |
}; |
|
| 63 | ||
| 64 |
// ***************************************************************************************************// |
|
| 65 | ||
| 66 |
// simple function value cache |
|
| 67 |
class Cache |
|
| 68 |
{
|
|
| 69 |
public: |
|
| 70 |
// save one pair of argument and function value |
|
| 71 |
void |
|
| 72 |
save(double arg, double val); |
|
| 73 | ||
| 74 |
// extract the arguments |
|
| 75 |
MyDoubleVector |
|
| 76 | 429x |
getArgs() const |
| 77 |
{
|
|
| 78 | 429x |
return args; |
| 79 |
} |
|
| 80 | ||
| 81 |
// extract the function values |
|
| 82 |
MyDoubleVector |
|
| 83 | 429x |
getVals() const |
| 84 |
{
|
|
| 85 | 429x |
return vals; |
| 86 |
} |
|
| 87 | ||
| 88 |
// clear the cache |
|
| 89 |
void |
|
| 90 |
clear(); |
|
| 91 | ||
| 92 |
// look for an already computed function value |
|
| 93 |
double |
|
| 94 |
getValue(double arg) const; |
|
| 95 | ||
| 96 |
// initialize from an R list |
|
| 97 |
Cache(Rcpp::List& rcpp_list); |
|
| 98 | ||
| 99 |
// default ctr is allowed as well |
|
| 100 | 13524x |
Cache(){};
|
| 101 | ||
| 102 |
// convert to an R list |
|
| 103 |
Rcpp::List |
|
| 104 |
convert2list() const; |
|
| 105 | ||
| 106 |
private: |
|
| 107 |
MyDoubleVector args; |
|
| 108 |
MyDoubleVector vals; |
|
| 109 |
}; |
|
| 110 | ||
| 111 |
// ***************************************************************************************************// |
|
| 112 | ||
| 113 |
// cache all function results |
|
| 114 |
template<class Fun> |
|
| 115 |
class CachedFunction |
|
| 116 |
{
|
|
| 117 |
public: |
|
| 118 |
// constructor takes function object reference and stores it. |
|
| 119 | 6873x |
CachedFunction(Fun& function) : |
| 120 | 6873x |
function(function) |
| 121 |
{
|
|
| 122 |
} |
|
| 123 | ||
| 124 |
// return the cached values |
|
| 125 |
Cache |
|
| 126 | 6873x |
getCache() const |
| 127 |
{
|
|
| 128 | 6873x |
return cache; |
| 129 |
} |
|
| 130 | ||
| 131 |
// for use as a function |
|
| 132 |
double |
|
| 133 |
operator()(double x); |
|
| 134 | ||
| 135 |
private: |
|
| 136 |
Fun& function; |
|
| 137 |
Cache cache; |
|
| 138 |
}; |
|
| 139 | ||
| 140 |
// ***************************************************************************************************// |
|
| 141 | ||
| 142 |
// get derivative of a function |
|
| 143 |
template<class Fun> |
|
| 144 |
class NumericDerivative |
|
| 145 |
{
|
|
| 146 |
public: |
|
| 147 |
// save the function (object) reference |
|
| 148 | 24x |
NumericDerivative(Fun& function) : |
| 149 | 24x |
function(function) |
| 150 |
{
|
|
| 151 |
} |
|
| 152 | ||
| 153 |
// for use as a function |
|
| 154 |
double |
|
| 155 |
operator()(double x) const; |
|
| 156 | ||
| 157 |
private: |
|
| 158 |
Fun& function; |
|
| 159 |
}; |
|
| 160 | ||
| 161 |
// ***************************************************************************************************// |
|
| 162 | ||
| 163 |
// get more accurate derivative of a function |
|
| 164 |
template<class Fun> |
|
| 165 |
class AccurateNumericDerivative |
|
| 166 |
{
|
|
| 167 |
public: |
|
| 168 |
// save the function (object) reference |
|
| 169 | 27396x |
AccurateNumericDerivative(Fun& function, |
| 170 |
double eps = 0.1) : |
|
| 171 | 27396x |
function(function), |
| 172 | 27396x |
h(eps), |
| 173 | 27396x |
ntab(10), |
| 174 | 27396x |
con(1.4), |
| 175 | 27396x |
con2(con * con), |
| 176 | 27396x |
big(DBL_MAX), |
| 177 | 27396x |
safe(2.0) |
| 178 |
{
|
|
| 179 | 27396x |
if(eps <= 0) |
| 180 |
{
|
|
| 181 | ! |
Rf_error("eps must be positive in AccurateNumericDerivative");
|
| 182 |
} |
|
| 183 |
} |
|
| 184 | ||
| 185 |
// for use as a function |
|
| 186 |
double |
|
| 187 |
operator()(double x) const; |
|
| 188 | ||
| 189 |
private: |
|
| 190 |
Fun& function; |
|
| 191 |
const double h; |
|
| 192 | ||
| 193 |
// sets maximum size of tableau |
|
| 194 |
const int ntab; |
|
| 195 | ||
| 196 |
// stepsize is decreased by factor con at each iteration |
|
| 197 |
const double con; |
|
| 198 |
const double con2; |
|
| 199 |
const double big; |
|
| 200 | ||
| 201 |
// return when error is safe worse than the best so far |
|
| 202 |
const double safe; |
|
| 203 |
}; |
|
| 204 | ||
| 205 | ||
| 206 |
// ***************************************************************************************************// |
|
| 207 | ||
| 208 |
// get inverse second derivative of a function |
|
| 209 |
template<class Fun> |
|
| 210 |
class NumericInvHessian |
|
| 211 |
{
|
|
| 212 |
public: |
|
| 213 |
// save the function (object) reference and the epsilon parameter |
|
| 214 |
NumericInvHessian(Fun& function, |
|
| 215 |
double eps = EPS) : |
|
| 216 |
function(function), |
|
| 217 |
eps(eps) |
|
| 218 |
{
|
|
| 219 |
} |
|
| 220 | ||
| 221 |
// for use as a function |
|
| 222 |
double |
|
| 223 |
operator()(double x) const; |
|
| 224 | ||
| 225 |
private: |
|
| 226 |
Fun& function; |
|
| 227 |
const double eps; |
|
| 228 |
}; |
|
| 229 | ||
| 230 |
// ***************************************************************************************************// |
|
| 231 | ||
| 232 |
// get more accurate inverse second derivative of a function |
|
| 233 |
template<class Fun> |
|
| 234 |
class AccurateNumericInvHessian |
|
| 235 |
{
|
|
| 236 |
public: |
|
| 237 |
// save the function (object) reference and the epsilon parameter |
|
| 238 | 6849x |
AccurateNumericInvHessian(Fun& function) : |
| 239 | 6849x |
function(function) |
| 240 |
{
|
|
| 241 |
} |
|
| 242 | ||
| 243 |
// for use as a function |
|
| 244 |
double |
|
| 245 |
operator()(double x) const; |
|
| 246 | ||
| 247 |
private: |
|
| 248 |
Fun& function; |
|
| 249 |
}; |
|
| 250 |
// ***************************************************************************************************// |
|
| 251 | ||
| 252 |
// for use as a function |
|
| 253 |
template <class Fun> |
|
| 254 |
double |
|
| 255 | 1339074x |
CachedFunction<Fun>::operator()(double x) |
| 256 |
{
|
|
| 257 | 1339074x |
double ret = cache.getValue(x); |
| 258 | ||
| 259 |
// if not found in the old arguments |
|
| 260 | 1339074x |
if(R_IsNA(ret)) |
| 261 |
{
|
|
| 262 |
// we have to compute and save it. |
|
| 263 | 869442x |
ret = function(x); |
| 264 | 869442x |
cache.save(x, ret); |
| 265 |
} |
|
| 266 | ||
| 267 |
// finally return the value (either found or newly computed) |
|
| 268 | 1339074x |
return ret; |
| 269 |
} |
|
| 270 | ||
| 271 |
// ***************************************************************************************************// |
|
| 272 | ||
| 273 |
// for use as a function |
|
| 274 |
template <class Fun> |
|
| 275 |
double |
|
| 276 | 1662x |
NumericDerivative<Fun>::operator()(double x) const |
| 277 |
{
|
|
| 278 |
// if abs(x) is (almost) zero, take delta = eps; otherwise delta = x*eps. |
|
| 279 |
// (important here: use of fabs instead of abs, which is an integer [!] version) |
|
| 280 | 1662x |
double delta = (fabs(x) == 0.0) ? EPS : x * EPS; |
| 281 | ||
| 282 |
// return the estimated derivative at x: |
|
| 283 | 1662x |
return (function(x + delta) - function(x)) / delta; |
| 284 |
} |
|
| 285 | ||
| 286 |
// ***************************************************************************************************// |
|
| 287 | ||
| 288 |
// for use as a function |
|
| 289 |
template <class Fun> |
|
| 290 |
double |
|
| 291 | 177918x |
AccurateNumericDerivative<Fun>::operator()(double x) const |
| 292 |
{
|
|
| 293 |
// modified from |
|
| 294 |
// http://www.koders.com/cpp/fid192DE24D3250B7B3632C6F54490A67268C992E28.aspx?s=Chebyshev |
|
| 295 |
// (Numerical Recipes) |
|
| 296 | ||
| 297 | 177918x |
double hh = h; |
| 298 | 177918x |
double err = big; |
| 299 | ||
| 300 | 355836x |
std::vector<MyDoubleVector> a (ntab, MyDoubleVector (ntab, 0)); |
| 301 | ||
| 302 | 177918x |
a[0][0] = (function(x + hh) - function(x - hh)) / (2.0 * hh); |
| 303 | ||
| 304 | 177918x |
double answer = a[0][0]; |
| 305 | ||
| 306 | 995964x |
for (int i = 1; i < ntab; i++) |
| 307 |
{
|
|
| 308 |
// Successive columns in the Neville tableau will go to smaller |
|
| 309 |
// stepsizes and higher orders of extrapolation |
|
| 310 | 995752x |
hh /= con; |
| 311 | ||
| 312 |
// Try new smaller stepsize: |
|
| 313 | 995752x |
a[0][i] = (function(x + hh) - function(x - hh)) / (2.0 * hh); |
| 314 | ||
| 315 | 995752x |
double fac = con2; |
| 316 | ||
| 317 |
// Compute extrapolation of various orders, requiring no new function evaluations |
|
| 318 | 4363800x |
for (int j = 1; j <= i; j++) |
| 319 |
{
|
|
| 320 | 3368048x |
a[j][i] = (a[j-1][i] * fac - a[j-1][i-1]) / (fac - 1.0); |
| 321 | ||
| 322 | 3368048x |
fac = con2 * fac; |
| 323 | ||
| 324 | 3368048x |
double errt = fmax(fabs(a[j][i] - a[j-1][i]), |
| 325 | 3368048x |
fabs(a[j][i] - a[j-1][i-1])); |
| 326 | ||
| 327 |
// The error strategy is to compare each new extrapolation to one order lower, |
|
| 328 |
// both at the present stepsize and the previous one. |
|
| 329 | ||
| 330 |
// If error is decreased, save the improved answer. |
|
| 331 | 3368048x |
if (errt <= err) |
| 332 |
{
|
|
| 333 | 1378372x |
err = errt; |
| 334 | 1378372x |
answer = a[j][i]; |
| 335 |
} |
|
| 336 |
} |
|
| 337 | ||
| 338 |
// If higher order is worse by a significant factor safe, then quit early. |
|
| 339 | 995752x |
if (fabs(a[i][i] - a[i-1][i-1]) >= safe * err) |
| 340 | 177706x |
break; |
| 341 |
} |
|
| 342 | ||
| 343 | 177918x |
return answer; |
| 344 |
} |
|
| 345 | ||
| 346 |
// ***************************************************************************************************// |
|
| 347 | ||
| 348 |
// for use as a function |
|
| 349 |
template <class Fun> |
|
| 350 |
double |
|
| 351 |
NumericInvHessian<Fun>::operator()(double x) const |
|
| 352 |
{
|
|
| 353 | ||
| 354 |
// if abs(x) is (almost) zero, take delta = eps; otherwise delta = x * eps. |
|
| 355 |
// (important here: use of fabs instead of abs, which is an integer [!] version) |
|
| 356 |
double delta = (fabs(x) == 0.0) ? eps : x * eps; |
|
| 357 |
delta = fabs(delta); |
|
| 358 | ||
| 359 |
// ensure that the difference between x and x + delta is exactly delta: |
|
| 360 |
double volatile temp = x + delta; |
|
| 361 |
delta = temp - x; |
|
| 362 | ||
| 363 |
// return the estimated inverse hessian at x: |
|
| 364 |
// (this is the analytical simplification of the centered differences formula, using the same |
|
| 365 |
// delta for the first and the second numerical derivative) |
|
| 366 |
// cf. Abramowitz/Stegun p. 884, formula 25.3.24. |
|
| 367 |
double forward = function(x + delta) - function(x); |
|
| 368 |
double backward = function(x) - function(x - delta); |
|
| 369 |
double ret = (delta * delta) / (forward - backward); |
|
| 370 | ||
| 371 |
return ret; |
|
| 372 |
} |
|
| 373 | ||
| 374 |
// ***************************************************************************************************// |
|
| 375 | ||
| 376 |
// for use as a function |
|
| 377 |
template <class Fun> |
|
| 378 |
double |
|
| 379 | 6849x |
AccurateNumericInvHessian<Fun>::operator()(double x) const |
| 380 |
{
|
|
| 381 |
// get the accurate derivative of the function |
|
| 382 | 6849x |
AccurateNumericDerivative<Fun> firstDerivative(function); |
| 383 | ||
| 384 |
// and the accurate derivative of the first derivative |
|
| 385 | 6849x |
AccurateNumericDerivative<AccurateNumericDerivative<Fun> > secondDerivative(firstDerivative); |
| 386 | ||
| 387 |
// so the result is |
|
| 388 | 6849x |
double ret = 1.0 / secondDerivative(x); |
| 389 | ||
| 390 |
// return that |
|
| 391 | 6849x |
return ret; |
| 392 |
} |
|
| 393 | ||
| 394 |
// ***************************************************************************************************// |
|
| 395 | ||
| 396 |
#endif /* FUNCTIONWRAPS_H_ */ |
| 1 |
// Modified by Isaac Gravestock (isaac.gravestock@uzh.ch) |
|
| 2 |
// |
|
| 3 |
// 13/07/2015: Update code to be compatible with CRAN |
|
| 4 |
// Replace cout and exit |
|
| 5 |
// |
|
| 6 | ||
| 7 |
#include "combinatorics.h" |
|
| 8 |
#include <iostream> |
|
| 9 |
#include "types.h" |
|
| 10 |
#include <cstdlib> |
|
| 11 |
#include <Rcpp.h> |
|
| 12 | ||
| 13 |
//using std::cout; |
|
| 14 |
using namespace Rcpp; |
|
| 15 | ||
| 16 | 1134x |
void ksub_next ( int n, int k, IntVector& a, bool *more, int &m, int &m2) |
| 17 | ||
| 18 |
//****************************************************************************80 |
|
| 19 |
// |
|
| 20 |
// Purpose: |
|
| 21 |
// |
|
| 22 |
// KSUB_NEXT generates the subsets of size K from a set of size N. |
|
| 23 |
// |
|
| 24 |
// Modified: |
|
| 25 |
// |
|
| 26 |
// 29 May 2003 |
|
| 27 |
// |
|
| 28 |
// Reference: |
|
| 29 |
// |
|
| 30 |
// Albert Nijenhuis, Herbert Wilf, |
|
| 31 |
// Combinatorial Algorithms, |
|
| 32 |
// Academic Press, 1978, second edition, |
|
| 33 |
// ISBN 0-12-519260-6. |
|
| 34 |
// |
|
| 35 |
// Parameters: |
|
| 36 |
// |
|
| 37 |
// Input, int N, the size of the set from which subsets are drawn. |
|
| 38 |
// |
|
| 39 |
// Input, int K, the desired size of the subsets. K must |
|
| 40 |
// be between 0 and N. |
|
| 41 |
// |
|
| 42 |
// Output, int A[K]. A[I] is the I-th element of the |
|
| 43 |
// subset. Thus A[I] will be an integer between 1 and N. |
|
| 44 |
// Note that the routine will return the values in A |
|
| 45 |
// in sorted order: 1 <= A[0] < A[1] < ... < A[K-1] <= N |
|
| 46 |
// |
|
| 47 |
// Input/output, bool *MORE. Set MORE = FALSE before first call |
|
| 48 |
// for a new sequence of subsets. It then is set and remains |
|
| 49 |
// TRUE as long as the subset computed on this call is not the |
|
| 50 |
// final one. When the final subset is computed, MORE is set to |
|
| 51 |
// FALSE as a signal that the computation is done. |
|
| 52 |
// |
|
| 53 |
// |
|
| 54 |
// |
|
| 55 |
{
|
|
| 56 |
int j; |
|
| 57 | ||
| 58 | 1134x |
if ( k < 0 || n < k ) |
| 59 |
{
|
|
| 60 | ! |
Rcout << "\n"; |
| 61 | ! |
Rcout << "KSUB_NEXT - Fatal error!\n"; |
| 62 | ! |
Rcout << "N = " << n << "\n"; |
| 63 | ! |
Rcout << "K = " << k << "\n"; |
| 64 | ! |
Rcout << "but 0 <= K <= N is required!\n"; |
| 65 |
//exit ( 1 ); |
|
| 66 | ! |
stop("Invalid parameters");
|
| 67 |
} |
|
| 68 | ||
| 69 | 1134x |
if ( !( *more ) ) |
| 70 |
{
|
|
| 71 | 108x |
m2 = 0; |
| 72 | 108x |
m = k; |
| 73 |
} |
|
| 74 |
else |
|
| 75 |
{
|
|
| 76 | 1026x |
if ( m2 < n-m ) |
| 77 |
{
|
|
| 78 | 558x |
m = 0; |
| 79 |
} |
|
| 80 | 1026x |
m = m + 1; |
| 81 | 1026x |
m2 = a[k-m]; |
| 82 |
} |
|
| 83 | ||
| 84 | 3294x |
for ( j = 1; j <= m; j++ ) |
| 85 |
{
|
|
| 86 | 2160x |
a[k+j-m-1] = m2 + j; |
| 87 |
} |
|
| 88 | ||
| 89 | 1134x |
*more = ( a[0] != (n-k+1) ); |
| 90 | ||
| 91 | 1134x |
return; |
| 92 |
} |
|
| 93 | ||
| 94 |
// ****************************************************************************************// |
|
| 95 | ||
| 96 | ! |
void comp_next ( int n, int k, IntVector& a, bool *more, int &h, int &t) |
| 97 | ||
| 98 |
//****************************************************************************80 |
|
| 99 |
// |
|
| 100 |
// Purpose: |
|
| 101 |
// |
|
| 102 |
// COMP_NEXT computes the compositions of the integer N into K parts. |
|
| 103 |
// |
|
| 104 |
// Discussion: |
|
| 105 |
// |
|
| 106 |
// The routine computes one composition on each call until there are no more. |
|
| 107 |
// For instance, one composition of 6 into 3 parts is |
|
| 108 |
// 3+2+1, another would be 6+0+0. |
|
| 109 |
// |
|
| 110 |
// Example: |
|
| 111 |
// |
|
| 112 |
// The 28 compositions of 6 into three parts are: |
|
| 113 |
// |
|
| 114 |
// 6 0 0, 5 1 0, 5 0 1, 4 2 0, 4 1 1, 4 0 2, |
|
| 115 |
// 3 3 0, 3 2 1, 3 1 2, 3 0 3, 2 4 0, 2 3 1, |
|
| 116 |
// 2 2 2, 2 1 3, 2 0 4, 1 5 0, 1 4 1, 1 3 2, |
|
| 117 |
// 1 2 3, 1 1 4, 1 0 5, 0 6 0, 0 5 1, 0 4 2, |
|
| 118 |
// 0 3 3, 0 2 4, 0 1 5, 0 0 6. |
|
| 119 |
// |
|
| 120 |
// Modified: |
|
| 121 |
// |
|
| 122 |
// 28 May 2003 |
|
| 123 |
// |
|
| 124 |
// Author: |
|
| 125 |
// |
|
| 126 |
// Albert Nijenhuis, Herbert Wilf, |
|
| 127 |
// |
|
| 128 |
// C++ translation by John Burkardt. |
|
| 129 |
// |
|
| 130 |
// Reference: |
|
| 131 |
// |
|
| 132 |
// Albert Nijenhuis, Herbert Wilf, |
|
| 133 |
// Combinatorial Algorithms, |
|
| 134 |
// Academic Press, 1978, second edition, |
|
| 135 |
// ISBN 0-12-519260-6. |
|
| 136 |
// |
|
| 137 |
// Parameters: |
|
| 138 |
// |
|
| 139 |
// Input, int N, the integer whose compositions are desired. |
|
| 140 |
// |
|
| 141 |
// Input, int K, the number of parts in the composition. |
|
| 142 |
// |
|
| 143 |
// Input/output, int A[K], the parts of the composition. |
|
| 144 |
// |
|
| 145 |
// Input/output, bool *MORE. |
|
| 146 |
// Set MORE = FALSE on first call. It will be reset to TRUE on return |
|
| 147 |
// with a new composition. Each new call returns another composition until |
|
| 148 |
// MORE is set to FALSE when the last composition has been computed |
|
| 149 |
// and returned. |
|
| 150 |
// |
|
| 151 |
{
|
|
| 152 | ||
| 153 |
int i; |
|
| 154 | ||
| 155 | ! |
if ( ! ( *more ) ) |
| 156 |
{
|
|
| 157 | ! |
t = n; |
| 158 | ! |
h = 0; |
| 159 | ! |
a[0] = n; |
| 160 | ! |
for ( i = 1; i < k; i++ ) |
| 161 |
{
|
|
| 162 | ! |
a[i] = 0; |
| 163 |
} |
|
| 164 |
} |
|
| 165 |
else |
|
| 166 |
{
|
|
| 167 | ! |
if ( 1 < t ) |
| 168 |
{
|
|
| 169 | ! |
h = 0; |
| 170 |
} |
|
| 171 | ||
| 172 | ! |
h = h + 1; |
| 173 | ! |
t = a[h-1]; |
| 174 | ! |
a[h-1] = 0; |
| 175 | ! |
a[0] = t - 1; |
| 176 | ! |
a[h] = a[h] + 1; |
| 177 | ||
| 178 |
} |
|
| 179 | ||
| 180 | ! |
*more = ( a[k-1] != n ); |
| 181 | ||
| 182 | ! |
return; |
| 183 |
} |
| 1 |
/* |
|
| 2 |
* coxfit.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 30.11.2012 |
|
| 5 |
* Author: daniel |
|
| 6 |
* |
|
| 7 |
* Modified from R's internal cox fit function for Efron / Breslow ties method, |
|
| 8 |
* R-2.15.2/src/library/Recommended/survival/src/coxfit6.c |
|
| 9 |
* Three other functions related to Cholesky factorization and solving are |
|
| 10 |
* also taken from the R sources. |
|
| 11 |
* |
|
| 12 |
*/ |
|
| 13 | ||
| 14 |
#include "rcppExport.h" |
|
| 15 |
#include "types.h" |
|
| 16 |
#include "coxfit.h" |
|
| 17 | ||
| 18 |
using namespace Rcpp; |
|
| 19 | ||
| 20 |
/* |
|
| 21 |
** A very few pathologic cases can cause the Newton Raphson iteration |
|
| 22 |
** path in coxph to generate a horrific argument to exp(). Since all these |
|
| 23 |
** calls to exp result in (essentially) relative risks we choose a |
|
| 24 |
** fixed value of LARGE on biological grounds: any number less than |
|
| 25 |
** 1/(population of the earth) is essentially a zero, that is, an exponent |
|
| 26 |
** outside the range of +-23. |
|
| 27 |
** A sensible numeric limit would be log(.Machine$double.xmax) which is |
|
| 28 |
** about 700, perhaps divided by 2 or log(n) to keep a few more bits. |
|
| 29 |
** However, passing this down the R calling chain to the c-routine is a lot |
|
| 30 |
** more hassle than I want to implement for this very rare case. |
|
| 31 |
** |
|
| 32 |
** Actually, the argument does not have to get large enough to have any |
|
| 33 |
** single exponential overflow. In (start, stop] data we keep a running |
|
| 34 |
** sum of scores exp(x[i]*beta), which involves both adding subjects in and |
|
| 35 |
** subtracting them out. An outlier x value that enters and then leaves can |
|
| 36 |
** erase all the digits of accuracy. Most machines have about 16 digits of |
|
| 37 |
** accuracy and exp(21) uses up about 9 of them, leaving enough that the |
|
| 38 |
** routine doesn't fall on it's face. (A user data set with outlier that |
|
| 39 |
** got exp(54) and a overlarge first beta on the first iteration led to this |
|
| 40 |
** paragraph.) When beta-hat is infinite and x well behaved, the loglik |
|
| 41 |
** usually converges before xbeta gets to 15, so this protection should not |
|
| 42 |
** harm the iteration path of even edge cases; only fix those that truely |
|
| 43 |
** go astray. |
|
| 44 |
** |
|
| 45 |
** The truncation turns out not to be necessary for small values, since a risk |
|
| 46 |
** score of exp(-50) or exp(-1000) or 0 all give essentially the same effect. |
|
| 47 |
** We only cut these off enough to avoid underflow. |
|
| 48 |
*/ |
|
| 49 | ||
| 50 |
#define LARGE 22 |
|
| 51 |
#define SMALL -200 |
|
| 52 | ||
| 53 | 17685610x |
double coxsafe(double x) {
|
| 54 |
if (x< SMALL) return(SMALL); |
|
| 55 |
if (x> LARGE) return(LARGE); |
|
| 56 | 17685610x |
return (x); |
| 57 |
} |
|
| 58 | ||
| 59 | ||
| 60 | ||
| 61 | ||
| 62 |
/* $Id: cholesky2.c 11357 2009-09-04 15:22:46Z therneau $ |
|
| 63 |
** |
|
| 64 |
** subroutine to do Cholesky decompostion on a matrix: C = FDF' |
|
| 65 |
** where F is lower triangular with 1's on the diagonal, and D is diagonal |
|
| 66 |
** |
|
| 67 |
** arguments are: |
|
| 68 |
** n the size of the matrix to be factored |
|
| 69 |
** **matrix a ragged array containing an n by n submatrix to be factored |
|
| 70 |
** toler the threshold value for detecting "singularity" |
|
| 71 |
** |
|
| 72 |
** The factorization is returned in the lower triangle, D occupies the |
|
| 73 |
** diagonal and the upper triangle is left undisturbed. |
|
| 74 |
** The lower triangle need not be filled in at the start. |
|
| 75 |
** |
|
| 76 |
** Return value: the rank of the matrix (non-negative definite), or -rank |
|
| 77 |
** it not SPD or NND |
|
| 78 |
** |
|
| 79 |
** If a column is deemed to be redundant, then that diagonal is set to zero. |
|
| 80 |
** |
|
| 81 |
** Terry Therneau |
|
| 82 |
*/ |
|
| 83 | ||
| 84 | 129115x |
int cholesky2(DoubleMatrix& matrix, int n, double toler) |
| 85 |
{
|
|
| 86 |
double temp; |
|
| 87 |
int i,j,k; |
|
| 88 |
double eps, pivot; |
|
| 89 |
int rank; |
|
| 90 |
int nonneg; |
|
| 91 | ||
| 92 | 129115x |
nonneg=1; |
| 93 | 129115x |
eps =0; |
| 94 | 537863x |
for (i=0; i<n; i++) {
|
| 95 | 408748x |
if (matrix[i][i] > eps) eps = matrix[i][i]; |
| 96 | 979846x |
for (j=(i+1); j<n; j++) matrix[j][i] = matrix[i][j]; |
| 97 |
} |
|
| 98 | 129115x |
eps *= toler; |
| 99 | ||
| 100 | 129115x |
rank =0; |
| 101 | 537863x |
for (i=0; i<n; i++) {
|
| 102 | 408748x |
pivot = matrix[i][i]; |
| 103 | 408748x |
if (pivot < eps) {
|
| 104 | ! |
matrix[i][i] =0; |
| 105 | ! |
if (pivot < -8*eps) nonneg= -1; |
| 106 |
} |
|
| 107 |
else {
|
|
| 108 | 408748x |
rank++; |
| 109 | 979846x |
for (j=(i+1); j<n; j++) {
|
| 110 | 571098x |
temp = matrix[j][i]/pivot; |
| 111 | 571098x |
matrix[j][i] = temp; |
| 112 | 571098x |
matrix[j][j] -= temp*temp*pivot; |
| 113 | 1079925x |
for (k=(j+1); k<n; k++) matrix[k][j] -= temp*matrix[k][i]; |
| 114 |
} |
|
| 115 |
} |
|
| 116 |
} |
|
| 117 | 129115x |
return(rank * nonneg); |
| 118 |
} |
|
| 119 | ||
| 120 |
/* $Id: chinv2.c 11357 2009-09-04 15:22:46Z therneau $ |
|
| 121 |
** |
|
| 122 |
** matrix inversion, given the FDF' cholesky decomposition |
|
| 123 |
** |
|
| 124 |
** input **matrix, which contains the chol decomp of an n by n |
|
| 125 |
** matrix in its lower triangle. |
|
| 126 |
** |
|
| 127 |
** returned: the upper triangle + diagonal contain (FDF')^{-1}
|
|
| 128 |
** below the diagonal will be F inverse |
|
| 129 |
** |
|
| 130 |
** Terry Therneau |
|
| 131 |
*/ |
|
| 132 | ||
| 133 | 25160x |
void chinv2(DoubleMatrix& matrix , int n) |
| 134 |
{
|
|
| 135 |
double temp; |
|
| 136 |
int i,j,k; |
|
| 137 | ||
| 138 |
/* |
|
| 139 |
** invert the cholesky in the lower triangle |
|
| 140 |
** take full advantage of the cholesky's diagonal of 1's |
|
| 141 |
*/ |
|
| 142 | 104533x |
for (i=0; i<n; i++){
|
| 143 | 79373x |
if (matrix[i][i] >0) {
|
| 144 | 79373x |
matrix[i][i] = 1/matrix[i][i]; /*this line inverts D */ |
| 145 | 190944x |
for (j= (i+1); j<n; j++) {
|
| 146 | 111571x |
matrix[j][i] = -matrix[j][i]; |
| 147 | 212245x |
for (k=0; k<i; k++) /*sweep operator */ |
| 148 | 100674x |
matrix[j][k] += matrix[j][i]*matrix[i][k]; |
| 149 |
} |
|
| 150 |
} |
|
| 151 |
} |
|
| 152 | ||
| 153 |
/* |
|
| 154 |
** lower triangle now contains inverse of cholesky |
|
| 155 |
** calculate F'DF (inverse of cholesky decomp process) to get inverse |
|
| 156 |
** of original matrix |
|
| 157 |
*/ |
|
| 158 | 104533x |
for (i=0; i<n; i++) {
|
| 159 | 79373x |
if (matrix[i][i]==0) { /* singular row */
|
| 160 | ! |
for (j=0; j<i; j++) matrix[j][i]=0; |
| 161 | ! |
for (j=i; j<n; j++) matrix[i][j]=0; |
| 162 |
} |
|
| 163 |
else {
|
|
| 164 | 190944x |
for (j=(i+1); j<n; j++) {
|
| 165 | 111571x |
temp = matrix[j][i]*matrix[j][j]; |
| 166 | 111571x |
if (j!=i) matrix[i][j] = temp; |
| 167 | 323816x |
for (k=i; k<j; k++) |
| 168 | 212245x |
matrix[i][k] += temp*matrix[j][k]; |
| 169 |
} |
|
| 170 |
} |
|
| 171 |
} |
|
| 172 |
} |
|
| 173 | ||
| 174 |
/* $Id: chsolve2.c 11376 2009-12-14 22:53:57Z therneau $ |
|
| 175 |
** |
|
| 176 |
** Solve the equation Ab = y, where the cholesky decomposition of A and y |
|
| 177 |
** are the inputs. |
|
| 178 |
** |
|
| 179 |
** Input **matrix, which contains the chol decomp of an n by n |
|
| 180 |
** matrix in its lower triangle. |
|
| 181 |
** y[n] contains the right hand side |
|
| 182 |
** |
|
| 183 |
** y is overwriten with b |
|
| 184 |
** |
|
| 185 |
** Terry Therneau |
|
| 186 |
*/ |
|
| 187 | ||
| 188 | 102289x |
void chsolve2(const DoubleMatrix& matrix, int n, AVector& y) |
| 189 |
{
|
|
| 190 |
int i,j; |
|
| 191 |
double temp; |
|
| 192 | ||
| 193 |
/* |
|
| 194 |
** solve Fb =y |
|
| 195 |
*/ |
|
| 196 | 427754x |
for (i=0; i<n; i++) {
|
| 197 | 325465x |
temp = y[i] ; |
| 198 | 781966x |
for (j=0; j<i; j++) |
| 199 | 913002x |
temp -= y[j] * matrix[i][j] ; |
| 200 | 650930x |
y[i] = temp ; |
| 201 |
} |
|
| 202 |
/* |
|
| 203 |
** solve DF'z =b |
|
| 204 |
*/ |
|
| 205 | 427754x |
for (i=(n-1); i>=0; i--) {
|
| 206 |
if (matrix[i][i]==0) y[i] =0; |
|
| 207 |
else {
|
|
| 208 | 650930x |
temp = y[i]/matrix[i][i]; |
| 209 | 781966x |
for (j= i+1; j<n; j++) |
| 210 | 913002x |
temp -= y[j]*matrix[j][i]; |
| 211 | 650930x |
y[i] = temp; |
| 212 |
} |
|
| 213 |
} |
|
| 214 |
} |
|
| 215 | ||
| 216 | ||
| 217 | ||
| 218 | ||
| 219 |
// getter for results |
|
| 220 |
CoxfitResults |
|
| 221 | 25160x |
Coxfit::finalizeAndGetResults() |
| 222 |
{
|
|
| 223 |
// copy the contents of the DoubleMatrix imat into |
|
| 224 |
// the Armadillo matrix results.imat |
|
| 225 | 104533x |
for(int i = 0; i < nCovs; ++i) |
| 226 |
{
|
|
| 227 | 381888x |
for(int j = 0; j < nCovs; ++j) |
| 228 |
{
|
|
| 229 | 605030x |
results.imat.at(i, j) = imat[i][j]; |
| 230 |
} |
|
| 231 |
} |
|
| 232 |
// the other results are already up to date, because |
|
| 233 |
// their storage is directly used in the fit function. |
|
| 234 | ||
| 235 |
// then return the results |
|
| 236 | 25160x |
return results; |
| 237 |
} |
|
| 238 | ||
| 239 | ||
| 240 |
// check results |
|
| 241 |
void |
|
| 242 | 25160x |
Coxfit::checkResults() const |
| 243 |
{
|
|
| 244 |
// todo: replace errors by exceptions later on |
|
| 245 | ||
| 246 | 25160x |
if(results.flag < nCovs) |
| 247 |
{
|
|
| 248 | ! |
Rf_error("Singular model!");
|
| 249 |
} |
|
| 250 | ||
| 251 | 50320x |
AVector infs = results.imat * results.u; |
| 252 | ||
| 253 | 25160x |
if(iterMax > 1) |
| 254 |
{
|
|
| 255 | 25160x |
if (results.flag == 1000) |
| 256 |
{
|
|
| 257 | ! |
Rf_error("Ran out of iterations and did not converge");
|
| 258 |
} |
|
| 259 |
else |
|
| 260 |
{
|
|
| 261 | 25160x |
IntVector notConverged; |
| 262 | ||
| 263 | 104533x |
for(int i=0; i < nCovs; ++i) |
| 264 |
{
|
|
| 265 |
if((infs[i] > eps) && (infs[i] > tolerInf * fabs(results.coefs[i]))) |
|
| 266 |
{
|
|
| 267 | ! |
notConverged.push_back(i + 1); |
| 268 |
} |
|
| 269 |
} |
|
| 270 | ||
| 271 | 25160x |
if(notConverged.size() > 0) |
| 272 |
{
|
|
| 273 | ! |
Rf_warning("Loglik converged before some variables; beta may be infinite. ");
|
| 274 |
} |
|
| 275 |
} |
|
| 276 |
} |
|
| 277 |
} |
|
| 278 | ||
| 279 | ||
| 280 |
// the fit function, returns the number of |
|
| 281 |
// required iterations |
|
| 282 |
int |
|
| 283 | 25160x |
Coxfit::fit() |
| 284 |
{
|
|
| 285 |
int i,j,k, person; |
|
| 286 | ||
| 287 |
double wtave; |
|
| 288 | 25160x |
double denom=0, zbeta, risk; |
| 289 |
double temp, temp2; |
|
| 290 |
int ndead; /* actually, the sum of their weights */ |
|
| 291 | 25160x |
double newlk = 0.0; |
| 292 |
double dtime, d2; |
|
| 293 |
double deadwt; /*sum of case weights for the deaths*/ |
|
| 294 |
double efronwt; /* sum of weighted risk scores for the deaths*/ |
|
| 295 |
int halving; /*are we doing step halving at the moment? */ |
|
| 296 |
int nrisk; /* number of subjects in the current risk set */ |
|
| 297 | ||
| 298 |
/* copies of scalar input arguments */ |
|
| 299 |
int nused, nvar, maxiter; |
|
| 300 |
double toler; |
|
| 301 |
int doscale; |
|
| 302 | ||
| 303 |
/* returned objects */ |
|
| 304 |
int iter; |
|
| 305 | ||
| 306 |
/* get local copies of some input args */ |
|
| 307 | 25160x |
nused = nObs; |
| 308 | 25160x |
nvar = nCovs; |
| 309 | 25160x |
maxiter = iterMax; |
| 310 | 25160x |
toler = tolerChol; /* tolerance for cholesky */ |
| 311 | 25160x |
doscale = 1; |
| 312 | ||
| 313 |
/* |
|
| 314 |
** Set up the ragged arrays and scratch space |
|
| 315 |
*/ |
|
| 316 | ||
| 317 | 25160x |
AVector a(nvar); |
| 318 | 25160x |
AVector newbeta(nvar); |
| 319 | 25160x |
AVector a2(nvar); |
| 320 | 25160x |
AVector scale(nvar); |
| 321 | ||
| 322 | 25160x |
DoubleMatrix cmat(nvar, nvar); |
| 323 | 25160x |
DoubleMatrix cmat2(nvar, nvar); |
| 324 | ||
| 325 |
/* |
|
| 326 |
** Subtract the mean from each covar, as this makes the regression |
|
| 327 |
** much more stable. |
|
| 328 |
*/ |
|
| 329 | 104533x |
for (i=0; i<nvar; i++) {
|
| 330 | 79373x |
temp=0; |
| 331 | 10952216x |
for (person=0; person<nused; person++) temp += X(person, i); |
| 332 | 79373x |
temp /= nused; |
| 333 | 10952216x |
for (person=0; person<nused; person++) |
| 334 |
{
|
|
| 335 | 21745686x |
X(person, i) -=temp; |
| 336 |
} |
|
| 337 | 79373x |
if (doscale==1) { /* and also scale it */
|
| 338 | 79373x |
temp =0; |
| 339 | 10952216x |
for (person=0; person<nused; person++) {
|
| 340 | 21745686x |
temp += fabs(X(person, i)); |
| 341 |
} |
|
| 342 | 79373x |
if (temp > 0) temp = nused/temp; /* scaling */ |
| 343 | ! |
else temp=1.0; /* rare case of a constant covariate */ |
| 344 | 79373x |
scale[i] = temp; |
| 345 | 10952216x |
for (person=0; person<nused; person++) |
| 346 |
{
|
|
| 347 | 21745686x |
X(person, i) *= temp; |
| 348 |
} |
|
| 349 |
} |
|
| 350 |
} |
|
| 351 | 25160x |
if (doscale==1) {
|
| 352 | 183906x |
for (i=0; i<nvar; i++) results.coefs[i] /= scale[i]; /*rescale initial betas */ |
| 353 |
} |
|
| 354 |
else {
|
|
| 355 | ! |
for (i=0; i<nvar; i++) scale[i] = 1.0; |
| 356 |
} |
|
| 357 | ||
| 358 |
/* |
|
| 359 |
** do the initial iteration step |
|
| 360 |
*/ |
|
| 361 | 25160x |
strata[nused-1] =1; |
| 362 | 25160x |
results.loglik[1] =0; |
| 363 | 104533x |
for (i=0; i<nvar; i++) {
|
| 364 | 79373x |
results.u[i] =0; |
| 365 | 79373x |
a2[i] =0; |
| 366 | 381888x |
for (j=0; j<nvar; j++) {
|
| 367 | 302515x |
imat[i][j] =0 ; |
| 368 | 302515x |
cmat2[i][j] =0; |
| 369 |
} |
|
| 370 |
} |
|
| 371 | ||
| 372 | 2566303x |
for (person=nused-1; person>=0; ) {
|
| 373 | 2541143x |
if (strata[person] == 1) {
|
| 374 | 25160x |
nrisk =0 ; |
| 375 | 25160x |
denom = 0; |
| 376 | 104533x |
for (i=0; i<nvar; i++) {
|
| 377 | 79373x |
a[i] = 0; |
| 378 | 381888x |
for (j=0; j<nvar; j++) cmat[i][j] = 0; |
| 379 |
} |
|
| 380 |
} |
|
| 381 | ||
| 382 | 2541143x |
dtime = survTimes[person]; |
| 383 | 2541143x |
ndead =0; /*number of deaths at this time point */ |
| 384 | 2541143x |
deadwt =0; /* sum of weights for the deaths */ |
| 385 | 2541143x |
efronwt=0; /* sum of weighted risks for the deaths */ |
| 386 | 11949708x |
while(person >=0 &&survTimes[person]==dtime) {
|
| 387 |
/* walk through the this set of tied times */ |
|
| 388 | 3446291x |
nrisk++; |
| 389 | 3446291x |
zbeta = offsets[person]; /* form the term beta*z (vector mult) */ |
| 390 | 14319134x |
for (i=0; i<nvar; i++) |
| 391 | 32618529x |
zbeta += results.coefs[i]*X(person, i); |
| 392 | 3446291x |
zbeta = coxsafe(zbeta); |
| 393 | 3446291x |
risk = exp(zbeta) * weights[person]; |
| 394 | 3446291x |
denom += risk; |
| 395 | ||
| 396 |
/* a is the vector of weighted sums of x, cmat sums of squares */ |
|
| 397 | 14319134x |
for (i=0; i<nvar; i++) {
|
| 398 | 21745686x |
a[i] += risk*X(person, i); |
| 399 | 37030284x |
for (j=0; j<=i; j++) |
| 400 | 78472323x |
cmat[i][j] += risk * X(person, i) * X(person, j); |
| 401 |
} |
|
| 402 | ||
| 403 | 3446291x |
if (censInd[person]==1) {
|
| 404 | 3219290x |
ndead++; |
| 405 | 3219290x |
deadwt += weights[person]; |
| 406 | 3219290x |
efronwt += risk; |
| 407 | 6438580x |
results.loglik[1] += weights[person]*zbeta; |
| 408 | ||
| 409 | 13376654x |
for (i=0; i<nvar; i++) |
| 410 | 40629456x |
results.u[i] += weights[person]*X(person, i); |
| 411 | 3219290x |
if (method==1) { /* Efron */
|
| 412 | 13376654x |
for (i=0; i<nvar; i++) {
|
| 413 | 20314728x |
a2[i] += risk*X(person, i); |
| 414 | 34594626x |
for (j=0; j<=i; j++) |
| 415 | 73311786x |
cmat2[i][j] += risk* X(person, i) * X(person, j); |
| 416 |
} |
|
| 417 |
} |
|
| 418 |
} |
|
| 419 | ||
| 420 | 3446291x |
person--; |
| 421 |
if (strata[person]==1) break; /*ties don't cross strata */ |
|
| 422 |
} |
|
| 423 | ||
| 424 | ||
| 425 | 2541143x |
if (ndead >0) { /* we need to add to the main terms */
|
| 426 | 2439857x |
if (method==0) { /* Breslow */
|
| 427 | ! |
results.loglik[1] -= deadwt* log(denom); |
| 428 | ||
| 429 | ! |
for (i=0; i<nvar; i++) {
|
| 430 | ! |
temp2= a[i]/ denom; /* mean */ |
| 431 | ! |
results.u[i] -= deadwt* temp2; |
| 432 | ! |
for (j=0; j<=i; j++) |
| 433 | ! |
imat[j][i] += deadwt*(cmat[i][j] - temp2*a[j])/denom; |
| 434 |
} |
|
| 435 |
} |
|
| 436 |
else { /* Efron */
|
|
| 437 |
/* |
|
| 438 |
** If there are 3 deaths we have 3 terms: in the first the |
|
| 439 |
** three deaths are all in, in the second they are 2/3 |
|
| 440 |
** in the sums, and in the last 1/3 in the sum. Let k go |
|
| 441 |
** from 0 to (ndead -1), then we will sequentially use |
|
| 442 |
** denom - (k/ndead)*efronwt as the denominator |
|
| 443 |
** a - (k/ndead)*a2 as the "a" term |
|
| 444 |
** cmat - (k/ndead)*cmat2 as the "cmat" term |
|
| 445 |
** and reprise the equations just above. |
|
| 446 |
*/ |
|
| 447 | 5659147x |
for (k=0; k<ndead; k++) {
|
| 448 | 3219290x |
temp = (double)k/ ndead; |
| 449 | 3219290x |
wtave = deadwt/ndead; |
| 450 | 3219290x |
d2 = denom - temp*efronwt; |
| 451 | 3219290x |
results.loglik[1] -= wtave* log(d2); |
| 452 | 13376654x |
for (i=0; i<nvar; i++) {
|
| 453 | 20314728x |
temp2 = (a[i] - temp*a2[i])/ d2; |
| 454 | 10157364x |
results.u[i] -= wtave *temp2; |
| 455 | 34594626x |
for (j=0; j<=i; j++) |
| 456 | 24437262x |
imat[j][i] += (wtave/d2) * |
| 457 | 24437262x |
((cmat[i][j] - temp*cmat2[i][j]) - |
| 458 | 73311786x |
temp2*(a[j]-temp*a2[j])); |
| 459 |
} |
|
| 460 |
} |
|
| 461 | ||
| 462 | 10137712x |
for (i=0; i<nvar; i++) {
|
| 463 | 7697855x |
a2[i]=0; |
| 464 | 37039158x |
for (j=0; j<nvar; j++) cmat2[i][j]=0; |
| 465 |
} |
|
| 466 |
} |
|
| 467 |
} |
|
| 468 |
} /* end of accumulation loop */ |
|
| 469 | 25160x |
results.loglik[0] = results.loglik[1]; /* save the loglik for iter 0 */ |
| 470 | ||
| 471 |
/* am I done? |
|
| 472 |
** update the betas and test for convergence |
|
| 473 |
*/ |
|
| 474 | 104533x |
for (i=0; i<nvar; i++) /*use 'a' as a temp to save u0, for the score test*/ |
| 475 | 238119x |
a[i] = results.u[i]; |
| 476 | ||
| 477 | 25160x |
results.flag= cholesky2(imat, nvar, toler); |
| 478 | 25160x |
chsolve2(imat,nvar,a); /* a replaced by a *inverse(i) */ |
| 479 | ||
| 480 | ||
| 481 |
/* |
|
| 482 |
** Never, never complain about convergence on the first step. That way, |
|
| 483 |
** if someone HAS to they can force one iter at a time. |
|
| 484 |
*/ |
|
| 485 | 104533x |
for (i=0; i<nvar; i++) {
|
| 486 | 317492x |
newbeta[i] = results.coefs[i] + a[i]; |
| 487 |
} |
|
| 488 | 25160x |
if (maxiter==0) {
|
| 489 | ! |
chinv2(imat,nvar); |
| 490 | ! |
for (i=0; i<nvar; i++) {
|
| 491 | ! |
results.coefs[i] *= scale[i]; /*return to original scale */ |
| 492 | ! |
results.u[i] /= scale[i]; |
| 493 | ! |
imat[i][i] *= scale[i]*scale[i]; |
| 494 | ! |
for (j=0; j<i; j++) {
|
| 495 | ! |
imat[j][i] *= scale[i]*scale[j]; |
| 496 | ! |
imat[i][j] = imat[j][i]; |
| 497 |
} |
|
| 498 |
} |
|
| 499 | ! |
return 0; |
| 500 |
} |
|
| 501 | ||
| 502 |
/* |
|
| 503 |
** here is the main loop |
|
| 504 |
*/ |
|
| 505 | 25160x |
halving =0 ; /* =1 when in the midst of "step halving" */ |
| 506 | 103955x |
for (iter=1; iter<= maxiter; iter++) {
|
| 507 | 103955x |
newlk =0; |
| 508 | 433330x |
for (i=0; i<nvar; i++) {
|
| 509 | 329375x |
results.u[i] =0; |
| 510 | 1577804x |
for (j=0; j<nvar; j++) |
| 511 | 1248429x |
imat[i][j] =0; |
| 512 |
} |
|
| 513 | ||
| 514 |
/* |
|
| 515 |
** The data is sorted from smallest time to largest |
|
| 516 |
** Start at the largest time, accumulating the risk set 1 by 1 |
|
| 517 |
*/ |
|
| 518 | 10603342x |
for (person=nused-1; person>=0; ) {
|
| 519 | 10499387x |
if (strata[person] == 1) { /* rezero temps for each strata */
|
| 520 | 103955x |
denom = 0; |
| 521 | 103955x |
nrisk =0; |
| 522 | 433330x |
for (i=0; i<nvar; i++) {
|
| 523 | 329375x |
a[i] = 0; |
| 524 | 1577804x |
for (j=0; j<nvar; j++) cmat[i][j] = 0; |
| 525 |
} |
|
| 526 |
} |
|
| 527 | ||
| 528 | 10499387x |
dtime = survTimes[person]; |
| 529 | 10499387x |
deadwt =0; |
| 530 | 10499387x |
ndead =0; |
| 531 | 10499387x |
efronwt =0; |
| 532 | 49373457x |
while(person>=0 && survTimes[person]==dtime) {
|
| 533 | 14239319x |
nrisk++; |
| 534 | 14239319x |
zbeta = offsets[person]; |
| 535 | 59358662x |
for (i=0; i<nvar; i++) |
| 536 | 135358029x |
zbeta += newbeta[i]*X(person, i); |
| 537 | 14239319x |
zbeta = coxsafe(zbeta); |
| 538 | 14239319x |
risk = exp(zbeta) * weights[person]; |
| 539 | 14239319x |
denom += risk; |
| 540 | ||
| 541 | 59358662x |
for (i=0; i<nvar; i++) {
|
| 542 | 90238686x |
a[i] += risk*X(person, i); |
| 543 | 153191369x |
for (j=0; j<=i; j++) |
| 544 | 324216078x |
cmat[i][j] += risk*X(person, i)*X(person, j); |
| 545 |
} |
|
| 546 | ||
| 547 | 14239319x |
if (censInd[person]==1) {
|
| 548 | 13301480x |
ndead++; |
| 549 | 13301480x |
deadwt += weights[person]; |
| 550 | 13301480x |
newlk += weights[person] *zbeta; |
| 551 | 55451960x |
for (i=0; i<nvar; i++) |
| 552 | 168601920x |
results.u[i] += weights[person] *X(person, i); |
| 553 | 13301480x |
if (method==1) { /* Efron */
|
| 554 | 13301480x |
efronwt += risk; |
| 555 | 55451960x |
for (i=0; i<nvar; i++) {
|
| 556 | 84300960x |
a2[i] += risk*X(person, i); |
| 557 | 143115656x |
for (j=0; j<=i; j++) |
| 558 | 302895528x |
cmat2[i][j] += risk*X(person, i)*X(person, j); |
| 559 |
} |
|
| 560 |
} |
|
| 561 |
} |
|
| 562 | ||
| 563 | 14239319x |
person--; |
| 564 |
if (strata[person]==1) break; /*tied times don't cross strata*/ |
|
| 565 |
} |
|
| 566 | ||
| 567 | 10499387x |
if (ndead >0) { /* add up terms*/
|
| 568 | 10080983x |
if (method==0) { /* Breslow */
|
| 569 | ! |
newlk -= deadwt* log(denom); |
| 570 | ! |
for (i=0; i<nvar; i++) {
|
| 571 | ! |
temp2= a[i]/ denom; /* mean */ |
| 572 | ! |
results.u[i] -= deadwt* temp2; |
| 573 | ! |
for (j=0; j<=i; j++) |
| 574 | ! |
imat[j][i] += (deadwt/denom)* |
| 575 | ! |
(cmat[i][j] - temp2*a[j]); |
| 576 |
} |
|
| 577 |
} |
|
| 578 |
else { /* Efron */
|
|
| 579 | 23382463x |
for (k=0; k<ndead; k++) {
|
| 580 | 13301480x |
temp = (double)k / ndead; |
| 581 | 13301480x |
wtave= deadwt/ ndead; |
| 582 | 13301480x |
d2= denom - temp* efronwt; |
| 583 | 13301480x |
newlk -= wtave* log(d2); |
| 584 | 55451960x |
for (i=0; i<nvar; i++) {
|
| 585 | 84300960x |
temp2 = (a[i] - temp*a2[i])/ d2; |
| 586 | 42150480x |
results.u[i] -= wtave*temp2; |
| 587 | 143115656x |
for (j=0; j<=i; j++) |
| 588 | 100965176x |
imat[j][i] += (wtave/d2)* |
| 589 | 100965176x |
((cmat[i][j] - temp*cmat2[i][j]) - |
| 590 | 302895528x |
temp2*(a[j]-temp*a2[j])); |
| 591 |
} |
|
| 592 |
} |
|
| 593 | ||
| 594 | 42025054x |
for (i=0; i<nvar; i++) { /*in anticipation */
|
| 595 | 31944071x |
a2[i] =0; |
| 596 | 153031076x |
for (j=0; j<nvar; j++) cmat2[i][j] =0; |
| 597 |
} |
|
| 598 |
} |
|
| 599 |
} |
|
| 600 |
} /* end of accumulation loop */ |
|
| 601 | ||
| 602 |
/* am I done? |
|
| 603 |
** update the betas and test for convergence |
|
| 604 |
*/ |
|
| 605 | 103955x |
results.flag = cholesky2(imat, nvar, toler); |
| 606 | ||
| 607 | 103955x |
if (fabs(1-(results.loglik[1]/newlk))<= eps && halving==0) { /* all done */
|
| 608 | 25160x |
results.loglik[1] = newlk; |
| 609 | 25160x |
chinv2(imat, nvar); /* invert the information matrix */ |
| 610 | 104533x |
for (i=0; i<nvar; i++) {
|
| 611 | 238119x |
results.coefs[i] = newbeta[i]*scale[i]; |
| 612 | 158746x |
results.u[i] /= scale[i]; |
| 613 | 238119x |
imat[i][i] *= scale[i]*scale[i]; |
| 614 | 190944x |
for (j=0; j<i; j++) {
|
| 615 | 334713x |
imat[j][i] *= scale[i]*scale[j]; |
| 616 | 111571x |
imat[i][j] = imat[j][i]; |
| 617 |
} |
|
| 618 |
} |
|
| 619 | 25160x |
return iter; |
| 620 |
} |
|
| 621 | ||
| 622 |
if (iter== maxiter) break; /*skip the step halving calc*/ |
|
| 623 | ||
| 624 | 78795x |
if (newlk < results.loglik[1]) { /*it is not converging ! */
|
| 625 | 1666x |
halving =1; |
| 626 | 5576x |
for (i=0; i<nvar; i++) |
| 627 | 15640x |
newbeta[i] = (newbeta[i] + results.coefs[i]) /2; /*half of old increment */ |
| 628 |
} |
|
| 629 |
else {
|
|
| 630 | 77129x |
halving=0; |
| 631 | 77129x |
results.loglik[1] = newlk; |
| 632 | 77129x |
chsolve2(imat,nvar,results.u); |
| 633 | 77129x |
j=0; |
| 634 | 323221x |
for (i=0; i<nvar; i++) {
|
| 635 | 492184x |
results.coefs[i] = newbeta[i]; |
| 636 | 984368x |
newbeta[i] = newbeta[i] + results.u[i]; |
| 637 |
} |
|
| 638 |
} |
|
| 639 |
} /* return for another iteration */ |
|
| 640 | ||
| 641 |
/* |
|
| 642 |
** We end up here only if we ran out of iterations |
|
| 643 |
*/ |
|
| 644 | ! |
results.loglik[1] = newlk; |
| 645 | ! |
chinv2(imat, nvar); |
| 646 | ! |
for (i=0; i<nvar; i++) {
|
| 647 | ! |
results.coefs[i] = newbeta[i]*scale[i]; |
| 648 | ! |
results.u[i] /= scale[i]; |
| 649 | ! |
imat[i][i] *= scale[i]*scale[i]; |
| 650 | ! |
for (j=0; j<i; j++) {
|
| 651 | ! |
imat[j][i] *= scale[i]*scale[j]; |
| 652 | ! |
imat[i][j] = imat[j][i]; |
| 653 |
} |
|
| 654 |
} |
|
| 655 | ! |
results.flag = 1000; |
| 656 | ||
| 657 | ! |
return iter; |
| 658 |
} |
|
| 659 | ||
| 660 | ||
| 661 |
// ***************************************************************************************************// |
|
| 662 | ||
| 663 |
// compute the residual deviance of this model |
|
| 664 |
double |
|
| 665 | 24463x |
Coxfit::computeResidualDeviance() |
| 666 |
{
|
|
| 667 |
// do the fitting |
|
| 668 | 24463x |
fit(); |
| 669 | ||
| 670 |
// get results: need to do that first! |
|
| 671 | 24463x |
CoxfitResults fit = finalizeAndGetResults(); |
| 672 | ||
| 673 |
// check results |
|
| 674 | 24463x |
checkResults(); |
| 675 | ||
| 676 |
// return residual deviance |
|
| 677 | 24463x |
double ret = - 2.0 * (fit.loglik[0] - fit.loglik[1]); |
| 678 | 24463x |
return ret; |
| 679 |
} |
|
| 680 | ||
| 681 |
// ***************************************************************************************************// |
|
| 682 | ||
| 683 |
// 03/07/2013: add offsets |
|
| 684 | ||
| 685 |
// just an R interface to the coxfit routine, for regression testing purposes. |
|
| 686 |
// [[Rcpp::export]] |
|
| 687 |
SEXP |
|
| 688 | 17x |
cpp_coxfit(SEXP R_survTimes, SEXP R_censInd, SEXP R_offsets, SEXP R_X, SEXP R_method) |
| 689 |
{
|
|
| 690 |
// --------------- |
|
| 691 |
// get R objects: |
|
| 692 | ||
| 693 |
// // extract survival times |
|
| 694 |
// R_interface = CDR(R_interface); |
|
| 695 |
// SEXP R_survTimes = CAR(R_interface); |
|
| 696 |
// |
|
| 697 |
// // censoring status |
|
| 698 |
// R_interface = CDR(R_interface); |
|
| 699 |
// SEXP R_censInd = CAR(R_interface); |
|
| 700 |
// |
|
| 701 |
// // offsets |
|
| 702 |
// R_interface = CDR(R_interface); |
|
| 703 |
// SEXP R_offsets = CAR(R_interface); |
|
| 704 |
// |
|
| 705 |
// // design matrix |
|
| 706 |
// R_interface = CDR(R_interface); |
|
| 707 |
// SEXP R_X= CAR(R_interface); |
|
| 708 |
// |
|
| 709 |
// // and the tie method |
|
| 710 |
// R_interface = CDR(R_interface); |
|
| 711 |
// SEXP R_method = CAR(R_interface); |
|
| 712 | ||
| 713 |
// --------------- |
|
| 714 |
// unpack R objects: |
|
| 715 | ||
| 716 |
// survival times |
|
| 717 | 17x |
const NumericVector n_survTimes = R_survTimes; |
| 718 |
//const AVector survTimes(n_survTimes.begin(), n_survTimes.size(), |
|
| 719 |
// false); |
|
| 720 |
|
|
| 721 |
// errors with const and stuff... what if we copy into new memory? |
|
| 722 | 17x |
const AVector survTimes(n_survTimes.begin(), n_survTimes.size()); |
| 723 | ||
| 724 |
// censoring status |
|
| 725 | 17x |
const IntVector censInd = as<IntVector>(R_censInd); |
| 726 | ||
| 727 |
// offsets |
|
| 728 | 17x |
const AVector offsets = as<NumericVector>(R_offsets); |
| 729 | ||
| 730 |
// design matrix |
|
| 731 | 17x |
const NumericMatrix n_X = R_X; |
| 732 |
//const AMatrix X(n_X.begin(), n_X.nrow(), |
|
| 733 |
// n_X.ncol(), false); |
|
| 734 |
|
|
| 735 |
//Same issue as above L717:721 |
|
| 736 | 17x |
const AMatrix X(n_X.begin(), n_X.nrow(), |
| 737 | 17x |
n_X.ncol()); |
| 738 |
|
|
| 739 | ||
| 740 |
// tie method |
|
| 741 | 17x |
const int method = as<int>(R_method); |
| 742 | ||
| 743 |
// --------------- |
|
| 744 |
// assign remaining arguments for Coxfit |
|
| 745 | ||
| 746 | 17x |
const int nObs = survTimes.size(); |
| 747 | ||
| 748 | 34x |
const AVector weights = arma::ones<AVector>(nObs); |
| 749 | ||
| 750 |
// --------------- |
|
| 751 | ||
| 752 |
// get new Coxfit object |
|
| 753 |
Coxfit cox(survTimes, |
|
| 754 |
censInd, |
|
| 755 |
X, |
|
| 756 |
weights, |
|
| 757 |
offsets, |
|
| 758 | 17x |
method); |
| 759 | ||
| 760 |
// do the fitting |
|
| 761 | 17x |
const int nIter = cox.fit(); |
| 762 | ||
| 763 |
// get results: need to do that first! |
|
| 764 | 17x |
CoxfitResults fit = cox.finalizeAndGetResults(); |
| 765 | ||
| 766 |
// check results |
|
| 767 | 17x |
cox.checkResults(); |
| 768 | ||
| 769 |
// pack results into R list and return that |
|
| 770 | 34x |
return List::create(_["coef"] = fit.coefs, |
| 771 | 34x |
_["imat"] = fit.imat, |
| 772 | 34x |
_["loglik"] = fit.loglik, |
| 773 | 68x |
_["nIter"] = nIter); |
| 774 | ||
| 775 |
} |
|
| 776 |
| 1 |
/* |
|
| 2 |
* coxfit.h |
|
| 3 |
* |
|
| 4 |
* Created on: 03.12.2012 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef COXFIT_H_ |
|
| 9 |
#define COXFIT_H_ |
|
| 10 | ||
| 11 |
#include "types.h" |
|
| 12 | ||
| 13 |
// Dynamic_2d_array class by David Maisonave (609-345-1007) (www.axter.com) |
|
| 14 |
// Description: |
|
| 15 |
// The dynamic array class listed below is more efficient then other |
|
| 16 |
// similar classes that use temporary objects as return types, or use |
|
| 17 |
// an std::vector as a return type. |
|
| 18 |
// |
|
| 19 |
// It's also more compatible with a C style 2D array, in that the |
|
| 20 |
// array is in one continuous memory block. This makes it possible |
|
| 21 |
// to pass this array object to a C Function that has a C-Style |
|
| 22 |
// 2D array for a parameter. |
|
| 23 |
// Example usage: |
|
| 24 |
/* |
|
| 25 | ||
| 26 |
Dynamic_2d_array<int> MyIntArray(12, 34); |
|
| 27 |
MyIntArray[0][1] = 123; |
|
| 28 |
cout << MyIntArray[0][1] << endl; |
|
| 29 | ||
| 30 |
*/ |
|
| 31 | ||
| 32 |
template < class T > |
|
| 33 |
class Dynamic_2d_array |
|
| 34 |
{
|
|
| 35 |
public: |
|
| 36 |
// constructor |
|
| 37 | 17760x |
Dynamic_2d_array(int row, int col) : |
| 38 | 17760x |
m_row(row), |
| 39 | 17760x |
m_col(col), |
| 40 | ! |
m_data((row!=0 && col!=0) ? new T[row*col] : NULL) |
| 41 |
{}
|
|
| 42 | ||
| 43 |
// copy ctr |
|
| 44 |
Dynamic_2d_array(const Dynamic_2d_array& src) : |
|
| 45 |
m_row(src.m_row), |
|
| 46 |
m_col(src.m_col), |
|
| 47 |
m_data((src.m_row!=0 && src.m_col!=0) ? new T[src.m_row*src.m_col] : NULL) |
|
| 48 |
{
|
|
| 49 |
for(int r=0; r<m_row; ++r) |
|
| 50 |
for(int c=0; c<m_col; ++c) |
|
| 51 |
(*this)[r][c] = src[r][c]; |
|
| 52 |
} |
|
| 53 | ||
| 54 |
// destructor |
|
| 55 | 17760x |
~Dynamic_2d_array() |
| 56 |
{
|
|
| 57 | 17760x |
delete[] m_data; |
| 58 |
} |
|
| 59 | ||
| 60 |
// non-const access |
|
| 61 | 187508584x |
inline T* operator[](int i) |
| 62 |
{
|
|
| 63 | 187508584x |
return (m_data + (m_col*i)); |
| 64 |
} |
|
| 65 | ||
| 66 |
// const access |
|
| 67 | 367984x |
inline T const*const operator[](int i) const |
| 68 |
{
|
|
| 69 | 367984x |
return (m_data + (m_col*i)); |
| 70 |
} |
|
| 71 | ||
| 72 |
private: |
|
| 73 |
const int m_row; |
|
| 74 |
const int m_col; |
|
| 75 |
T* m_data; |
|
| 76 |
}; |
|
| 77 | ||
| 78 |
// Note that the class Dynamic_2d_array automatically allocates and |
|
| 79 |
// deallocates the memory. |
|
| 80 |
typedef Dynamic_2d_array<long> LongMatrix; |
|
| 81 |
typedef Dynamic_2d_array<double> DoubleMatrix; |
|
| 82 |
typedef Dynamic_2d_array<int> IntMatrix; |
|
| 83 | ||
| 84 |
// ************************************************************************************** |
|
| 85 | ||
| 86 | ||
| 87 |
struct CoxfitResults |
|
| 88 |
{
|
|
| 89 |
// constructor: |
|
| 90 | 5920x |
CoxfitResults(const int nCovs) : |
| 91 | 5920x |
coefs(nCovs), |
| 92 | 5920x |
imat(nCovs, nCovs), |
| 93 | 5920x |
u(nCovs), |
| 94 | 5920x |
loglik(2), |
| 95 | 5920x |
flag(0) |
| 96 |
{
|
|
| 97 |
// fill with initial values for the null model: |
|
| 98 | 5920x |
coefs.fill(0.0); |
| 99 |
} |
|
| 100 | ||
| 101 | ||
| 102 |
// the coefficient vector |
|
| 103 |
AVector coefs; |
|
| 104 | ||
| 105 |
// the covariance matrix estimate |
|
| 106 |
AMatrix imat; |
|
| 107 | ||
| 108 |
// the score vector |
|
| 109 |
AVector u; |
|
| 110 | ||
| 111 |
// the log likelihoods of the initial coef vector and |
|
| 112 |
// the final one |
|
| 113 |
MyDoubleVector loglik; |
|
| 114 | ||
| 115 |
// convergence flag |
|
| 116 |
int flag; |
|
| 117 |
}; |
|
| 118 | ||
| 119 | ||
| 120 |
// ************************************************************************************** |
|
| 121 | ||
| 122 | ||
| 123 |
// The class which does the Cox model fitting |
|
| 124 |
class Coxfit {
|
|
| 125 | ||
| 126 |
public: |
|
| 127 | ||
| 128 |
// ctr |
|
| 129 | 5920x |
Coxfit(const AVector& survTimes, |
| 130 |
const IntVector& censInd, |
|
| 131 |
const AMatrix& X, |
|
| 132 |
const AVector& weights, |
|
| 133 |
const AVector& offsets, |
|
| 134 |
const int method, |
|
| 135 |
double eps = 1e-09, |
|
| 136 |
double tolerChol = pow(DBL_EPSILON, 0.75), |
|
| 137 |
int iterMax = 40, |
|
| 138 | 5920x |
double tolerInf = 1e-05) : |
| 139 | 5920x |
survTimes(survTimes), |
| 140 | 5920x |
censInd(censInd), |
| 141 | 5920x |
X(X), |
| 142 | 5920x |
weights(weights), |
| 143 | 5920x |
offsets(offsets), |
| 144 | 5920x |
method(method), |
| 145 | 5920x |
nObs(survTimes.size()), |
| 146 | 5920x |
nCovs(X.n_cols), |
| 147 | 5920x |
strata(nObs, 0), |
| 148 | 5920x |
imat(nCovs, nCovs), |
| 149 | 5920x |
results(nCovs), |
| 150 | 5920x |
eps(eps), |
| 151 | 5920x |
tolerChol(tolerChol), |
| 152 | 5920x |
iterMax(iterMax), |
| 153 | 5920x |
tolerInf(tolerInf) |
| 154 |
{}
|
|
| 155 | ||
| 156 |
// getter for results |
|
| 157 |
CoxfitResults |
|
| 158 |
finalizeAndGetResults(); |
|
| 159 | ||
| 160 |
// check results |
|
| 161 |
void |
|
| 162 |
checkResults() const; |
|
| 163 | ||
| 164 |
// the fit function, returns the number of |
|
| 165 |
// required iterations |
|
| 166 |
int |
|
| 167 |
fit(); |
|
| 168 | ||
| 169 |
// compute the residual deviance of this model |
|
| 170 |
double |
|
| 171 |
computeResidualDeviance(); |
|
| 172 | ||
| 173 |
private: |
|
| 174 | ||
| 175 |
// inputs |
|
| 176 |
AVector survTimes; |
|
| 177 |
IntVector censInd; |
|
| 178 |
AMatrix X; |
|
| 179 |
AVector weights; |
|
| 180 |
AVector offsets; |
|
| 181 |
const int method; |
|
| 182 | ||
| 183 |
const int nObs; |
|
| 184 |
const int nCovs; |
|
| 185 | ||
| 186 |
IntVector strata; |
|
| 187 | ||
| 188 |
// temporary storage |
|
| 189 |
DoubleMatrix imat; |
|
| 190 | ||
| 191 |
// outputs |
|
| 192 |
CoxfitResults results; |
|
| 193 | ||
| 194 |
// numerical options |
|
| 195 |
const double eps; |
|
| 196 |
const double tolerChol; |
|
| 197 |
const int iterMax; |
|
| 198 |
const double tolerInf; |
|
| 199 |
}; |
|
| 200 | ||
| 201 |
#endif /* COXFIT_H_ */ |
| 1 |
#include <set> |
|
| 2 |
#include <algorithm> |
|
| 3 |
#include <cmath> |
|
| 4 |
#include <string> |
|
| 5 |
#include <sstream> |
|
| 6 | ||
| 7 |
#include "dataStructure.h" |
|
| 8 |
#include "sum.h" |
|
| 9 |
#include "functionWraps.h" |
|
| 10 |
#include "fpUcHandling.h" |
|
| 11 | ||
| 12 |
#include "rcppExport.h" |
|
| 13 | ||
| 14 |
using std::lexicographical_compare; |
|
| 15 |
using std::accumulate; |
|
| 16 |
using std::exp; |
|
| 17 |
using std::log; |
|
| 18 | ||
| 19 |
using namespace Rcpp; |
|
| 20 | ||
| 21 |
// ***************************************************************************************************// |
|
| 22 | ||
| 23 |
// safeSum // |
|
| 24 | ||
| 25 | 543200x |
void SafeSum::add(const long double &val) |
| 26 |
{
|
|
| 27 | 543200x |
vals.push_back(val); |
| 28 |
} |
|
| 29 | ||
| 30 | 3456x |
long double SafeSum::sum() |
| 31 |
{
|
|
| 32 | 3456x |
long double ret = modified_deflation(vals); |
| 33 | 3456x |
return ret; |
| 34 |
} |
|
| 35 | ||
| 36 |
// compute the log of the sum of the exp of the elements using accurate algorithm, |
|
| 37 |
// and avoiding infinite contributions. |
|
| 38 | 22080x |
long double SafeSum::logSumExp() |
| 39 |
{
|
|
| 40 |
// be sure that there is at least 1 value in "vals" |
|
| 41 | 22080x |
if(vals.empty()) |
| 42 |
{
|
|
| 43 | ! |
return R_NaN; |
| 44 |
} |
|
| 45 | ||
| 46 |
// the maximum of the log contributions is: |
|
| 47 | 22080x |
long double maxLogContrib = *std::max_element(vals.begin(), vals.end()); |
| 48 | ||
| 49 |
// now compute the constant which is added to all log contributions, |
|
| 50 |
// in order to avoid infinite contributions and at the same time use |
|
| 51 |
// the whole number space (i.e. possibly avoid zero contributions) |
|
| 52 | 22080x |
long double constant = log(LDBL_MAX) - 100.0L - maxLogContrib; |
| 53 |
// 100 is for safety. |
|
| 54 | ||
| 55 |
// so now the contributions, offset by the constant |
|
| 56 | 22080x |
LongDoubleVector expVals; |
| 57 | 22080x |
for(LongDoubleVector::const_iterator |
| 58 | 22080x |
l = vals.begin(); |
| 59 | 484192x |
l != vals.end(); |
| 60 | 462112x |
++l) |
| 61 |
{
|
|
| 62 | 462112x |
expVals.push_back(exp(*l + constant)); |
| 63 |
} |
|
| 64 | ||
| 65 |
// the result is the log of the sum, corrected with the constant: |
|
| 66 | 22080x |
long double ret = log(modified_deflation(expVals)) - constant; |
| 67 | 22080x |
return ret; |
| 68 |
} |
|
| 69 | ||
| 70 | ! |
long double SafeSum::simpleSum() |
| 71 |
{
|
|
| 72 | ! |
long double ret = 0.0; |
| 73 | ! |
for(LongDoubleVector::const_iterator |
| 74 | ! |
v = vals.begin(); |
| 75 | ! |
v != vals.end(); |
| 76 | ! |
++v) |
| 77 |
{
|
|
| 78 | ! |
ret += *v; |
| 79 |
} |
|
| 80 | ! |
return ret; |
| 81 |
} |
|
| 82 | ||
| 83 |
// ***************************************************************************************************// |
|
| 84 | ||
| 85 |
// indexSafeSum // |
|
| 86 | ||
| 87 | 3072x |
void IndexSafeSum::add(const Ind& ind) |
| 88 |
{
|
|
| 89 | 3072x |
indices.insert(ind); |
| 90 |
} |
|
| 91 | ||
| 92 |
long double |
|
| 93 | 96x |
IndexSafeSum::sumNormalizedExp(const SafeSum& s, long double logNormConst) const |
| 94 |
{
|
|
| 95 | 96x |
LongDoubleVector tempVec; |
| 96 | 3168x |
for (IndSet::const_iterator i = indices.begin(); i != indices.end(); i++) |
| 97 |
{
|
|
| 98 | 3072x |
tempVec.push_back(exp(s.vals.at(* i) - logNormConst)); |
| 99 |
} |
|
| 100 | 192x |
return modified_deflation(tempVec); |
| 101 |
} |
|
| 102 | ||
| 103 |
// ***************************************************************************************************// |
|
| 104 | ||
| 105 | ||
| 106 |
// Book // |
|
| 107 | ||
| 108 |
// 21/11/2012: add tbf option |
|
| 109 | ||
| 110 |
// constructor which checks the chainlength |
|
| 111 | 256x |
Book::Book(bool tbf, |
| 112 |
bool doGlm, |
|
| 113 |
bool empiricalBayes, |
|
| 114 |
bool useFixedg, |
|
| 115 |
bool useFixedc, |
|
| 116 |
double cl, |
|
| 117 |
bool doSampling, |
|
| 118 |
bool verbose, |
|
| 119 |
std::string modelPrior, |
|
| 120 |
PosInt nModels, |
|
| 121 |
PosInt nCache, |
|
| 122 |
double largeVariance, |
|
| 123 |
bool useBfgs, |
|
| 124 |
bool debug, |
|
| 125 | 256x |
bool higherOrderCorrection) : |
| 126 | 256x |
modelCounter(0), |
| 127 | 256x |
nanCounter(0), |
| 128 | 256x |
tbf(tbf), |
| 129 | 256x |
doGlm(doGlm), |
| 130 | 256x |
empiricalBayes(empiricalBayes), |
| 131 | 256x |
useFixedg(useFixedg), |
| 132 | 256x |
useFixedc(useFixedc), |
| 133 | 256x |
doSampling(doSampling), |
| 134 | 256x |
verbose(verbose), |
| 135 | 256x |
modelPrior(modelPrior), |
| 136 | 256x |
nModels(nModels), |
| 137 | 256x |
nCache(nCache), |
| 138 | 256x |
largeVariance(largeVariance), |
| 139 | 256x |
useBfgs(useBfgs), |
| 140 | 256x |
debug(debug), |
| 141 | 256x |
higherOrderCorrection(higherOrderCorrection) |
| 142 |
{
|
|
| 143 | 256x |
if (doSampling) |
| 144 |
{
|
|
| 145 |
// check if chainlength is too large |
|
| 146 | 224x |
if (ULLONG_MAX < cl) |
| 147 |
{
|
|
| 148 | ! |
Rf_warning("\nchainlength too high - reducing to %llu\n", ULLONG_MAX);
|
| 149 | ! |
chainlength = ULONG_MAX; |
| 150 |
} |
|
| 151 |
else |
|
| 152 |
{
|
|
| 153 | 224x |
chainlength = static_cast<PosLargeInt> (cl); |
| 154 |
} |
|
| 155 |
} |
|
| 156 |
else |
|
| 157 |
{
|
|
| 158 |
// we will do an exhaustive model search, so initialize chainlength |
|
| 159 |
// with a non-usable number: |
|
| 160 | 32x |
chainlength = 0; |
| 161 |
} |
|
| 162 |
} |
|
| 163 | ||
| 164 | ||
| 165 |
// ***************************************************************************************************// |
|
| 166 | ||
| 167 |
// GlmModelInfo // |
|
| 168 | ||
| 169 |
// convert to Rcpp list |
|
| 170 |
List |
|
| 171 | 6608x |
GlmModelInfo::convert2list(long double logNormConst, |
| 172 |
const Book& bookkeep) const |
|
| 173 |
{
|
|
| 174 | 13216x |
return List::create(_["logMargLik"] = logMargLik, |
| 175 | 13216x |
_["logPrior"] = logPrior, |
| 176 | 6608x |
_["posterior"] = NumericVector::create(exp(logPost - logNormConst), |
| 177 | 19824x |
bookkeep.doSampling ? (hits * 1.0 / bookkeep.chainlength) : NA_REAL), |
| 178 | 13216x |
_["negLogUnnormZDensities"] = negLogUnnormZDensities.convert2list(), |
| 179 | 13216x |
_["zMode"] = zMode, |
| 180 | 13216x |
_["zVar"] = zVar, |
| 181 | 13216x |
_["laplaceApprox"] = laplaceApprox, |
| 182 | 19824x |
_["residualDeviance"] = residualDeviance); |
| 183 |
} |
|
| 184 | ||
| 185 | ||
| 186 |
// ***************************************************************************************************// |
|
| 187 | ||
| 188 | 12037072x |
bool ModelPar::operator<(const ModelPar& m) const |
| 189 |
{
|
|
| 190 |
// return size() < m.size(); way too easy... lexicographical comparison, starting with uc indices: |
|
| 191 | 12037072x |
if (ucPars < m.ucPars) |
| 192 | 4401936x |
return true; |
| 193 | 7635136x |
else if (ucPars > m.ucPars) |
| 194 | 2085856x |
return false; |
| 195 |
else // uc indices are equal |
|
| 196 | 5549280x |
return lexicographical_compare(fpPars.begin(), fpPars.end(), m.fpPars.begin(), m.fpPars.end()); |
| 197 |
} |
|
| 198 | ||
| 199 |
// ***************************************************************************************************// |
|
| 200 | ||
| 201 |
// return a textual description of this model configuration |
|
| 202 |
std::string |
|
| 203 | ! |
ModelPar::print(const FpInfo& fpInfo) const |
| 204 |
{
|
|
| 205 |
// put everything into a stringstream, and return the string conversion at the end. |
|
| 206 | ! |
std::ostringstream stream; |
| 207 | ||
| 208 |
// ask for two significant digits |
|
| 209 | ! |
stream.precision(2); |
| 210 | ||
| 211 |
// Start |
|
| 212 | ! |
stream << "\nmodel with " << fpSize << " FP powers and " << ucPars.size() << " UC groups.\n\nIncluded FP powers:"; |
| 213 | ||
| 214 | ! |
if(fpSize > 0) |
| 215 |
{
|
|
| 216 |
// in parallel to the power index vector, go along the names vector |
|
| 217 | ! |
StrVector::const_iterator n = fpInfo.fpnames.begin(); |
| 218 | ! |
for(PowersVector::const_iterator |
| 219 | ! |
i = fpPars.begin(); |
| 220 | ! |
i != fpPars.end(); |
| 221 | ! |
++i, ++n) |
| 222 |
{
|
|
| 223 |
// if there is at least one power |
|
| 224 | ! |
if(i->size() > 0) |
| 225 |
{
|
|
| 226 |
// print the name |
|
| 227 | ! |
stream << "\nFor " << *n << ": "; |
| 228 | ||
| 229 |
// and all powers |
|
| 230 | ! |
MyDoubleVector thesePowers = fpInfo.inds2powers(*i); |
| 231 | ! |
for(MyDoubleVector::const_iterator |
| 232 | ! |
p = thesePowers.begin(); |
| 233 | ! |
p != thesePowers.end(); |
| 234 | ! |
++p) |
| 235 |
{
|
|
| 236 | ! |
stream << *p << " "; |
| 237 |
} |
|
| 238 |
} |
|
| 239 |
} |
|
| 240 |
} |
|
| 241 |
else |
|
| 242 |
{
|
|
| 243 | ! |
stream << "\nnone"; |
| 244 |
} |
|
| 245 | ||
| 246 | ||
| 247 | ! |
stream << "\n\nIndexes of included UC groups:"; |
| 248 | ||
| 249 | ! |
if(ucPars.size() > 0) |
| 250 |
{
|
|
| 251 | ! |
for(IntSet::const_iterator |
| 252 | ! |
i = ucPars.begin(); |
| 253 | ! |
i != ucPars.end(); |
| 254 | ! |
++i) |
| 255 |
{
|
|
| 256 | ! |
stream << " " << *i; |
| 257 |
} |
|
| 258 |
} |
|
| 259 |
else |
|
| 260 |
{
|
|
| 261 | ! |
stream << " none"; |
| 262 |
} |
|
| 263 |
|
|
| 264 | ! |
stream << "\n\nIndexes of included fixed groups:"; |
| 265 |
|
|
| 266 | ! |
if(fixPars.size() > 0) |
| 267 |
{
|
|
| 268 | ! |
for(IntSet::const_iterator |
| 269 | ! |
i = fixPars.begin(); |
| 270 | ! |
i != fixPars.end(); |
| 271 | ! |
++i) |
| 272 |
{
|
|
| 273 | ! |
stream << " " << *i; |
| 274 |
} |
|
| 275 |
} |
|
| 276 |
else |
|
| 277 |
{
|
|
| 278 | ! |
stream << " none"; |
| 279 |
} |
|
| 280 | ||
| 281 |
// return the resulting string |
|
| 282 | ! |
return stream.str(); |
| 283 |
} |
|
| 284 | ||
| 285 |
// ***************************************************************************************************// |
|
| 286 | ||
| 287 |
// convert R list to ModelPar |
|
| 288 | 672x |
ModelPar::ModelPar(List rcpp_configuration, |
| 289 | 672x |
const FpInfo& fpInfo) : |
| 290 | 672x |
fpSize(0), |
| 291 | 672x |
ucPars(as<IntSet>(rcpp_configuration["ucTerms"])), |
| 292 | 1344x |
fixPars(as<IntSet>(rcpp_configuration["fixTerms"])) |
| 293 |
{
|
|
| 294 |
// get the list with the FP power vectors: |
|
| 295 | 1344x |
List rcpp_powers = rcpp_configuration["powers"]; |
| 296 | ||
| 297 |
// now process all fp names saved in the fpInfo, to construct the correct PowersVector |
|
| 298 | 672x |
for(StrVector::const_iterator s = fpInfo.fpnames.begin(); |
| 299 | 1024x |
s != fpInfo.fpnames.end(); |
| 300 | 352x |
++s) |
| 301 |
{
|
|
| 302 |
// get the list element with this name, |
|
| 303 |
// a double vector, |
|
| 304 |
// and get the Powers object from that (that is, convert double vector to indexes) |
|
| 305 | 704x |
Powers powers(fpInfo.vec2inds(as<MyDoubleVector>(rcpp_powers[*s]))); |
| 306 | ||
| 307 |
// we have started with fpSize = 0, so we can just add the number of powers now |
|
| 308 | 352x |
fpSize += powers.size(); |
| 309 | ||
| 310 |
// save the powers at the right place |
|
| 311 | 352x |
fpPars.push_back(powers); |
| 312 |
} |
|
| 313 | ||
| 314 |
// now we are sure that the PowersVector fpPars is parallel to the fpInfo stuff. |
|
| 315 |
} |
|
| 316 | ||
| 317 | ||
| 318 |
// ***************************************************************************************************// |
|
| 319 | ||
| 320 |
List |
|
| 321 | 6608x |
ModelPar::convert2list(const FpInfo& currFp) const |
| 322 |
{
|
|
| 323 |
// powers |
|
| 324 | 6608x |
List powers(fpPars.size()); |
| 325 | 6608x |
powers.names() = currFp.fpnames; |
| 326 | ||
| 327 | 19408x |
for (PosInt i = 0; i != fpPars.size(); ++i) |
| 328 |
{
|
|
| 329 | 12800x |
powers[i] = currFp.inds2powers(fpPars[i]); |
| 330 |
} |
|
| 331 | ||
| 332 |
// return with uc settings |
|
| 333 | 13216x |
return List::create(_["ucTerms"] = ucPars, |
| 334 | 13216x |
_["powers"] = powers, |
| 335 | 26432x |
_["fixTerms"] = fixPars); |
| 336 |
} |
|
| 337 | ||
| 338 | ||
| 339 |
// ***************************************************************************************************// |
|
| 340 | ||
| 341 |
// compute set of free uc group indices in a model configuration |
|
| 342 |
IntSet |
|
| 343 | 857376x |
ModelPar::getFreeUcs(const PosIntVector& ucSizes, |
| 344 |
const PosInt& currDim, |
|
| 345 |
const PosInt& maxDim) const |
|
| 346 |
{
|
|
| 347 | 857376x |
IntSet ret; |
| 348 | ||
| 349 | 6001056x |
for (PosIntVector::size_type i = 1; i <= ucSizes.size(); i++) |
| 350 |
{ // for every uc index
|
|
| 351 | 5143680x |
if ((find(ucPars.begin(), ucPars.end(), i) == ucPars.end()) |
| 352 | 5143680x |
&& (ucSizes.at(i - 1) <= maxDim - currDim)) |
| 353 |
{
|
|
| 354 | 3172528x |
ret.insert(i); // insert if not already in model and enough space in design matrix |
| 355 |
} |
|
| 356 |
} |
|
| 357 | ||
| 358 | 857376x |
return ret; |
| 359 |
} |
|
| 360 | ||
| 361 |
// ***************************************************************************************************// |
|
| 362 | ||
| 363 |
// compute set of free cov indices in a model configuration |
|
| 364 |
PosIntSet |
|
| 365 | 760544x |
ModelPar::getFreeCovs(const FpInfo& currFp, |
| 366 |
const IntSet& freeUcs, |
|
| 367 |
const PosInt& currDim, |
|
| 368 |
const PosInt& maxDim) const |
|
| 369 |
{
|
|
| 370 | 760544x |
PosIntSet ret; |
| 371 | ||
| 372 | 760544x |
if (currDim == maxDim) |
| 373 |
{
|
|
| 374 | 28048x |
return ret; |
| 375 |
} |
|
| 376 | ||
| 377 | 1208080x |
for (PosInt i = 0; i != fpPars.size(); i++) |
| 378 |
{
|
|
| 379 | 475584x |
if (fpPars.at(i).size() < currFp.fpmaxs.at(i)) |
| 380 |
{
|
|
| 381 | 329248x |
ret.insert(i + 1); |
| 382 |
} |
|
| 383 |
} |
|
| 384 | ||
| 385 | 732496x |
if (! freeUcs.empty()) |
| 386 |
{
|
|
| 387 | 615088x |
ret.insert(fpPars.size() + 1); |
| 388 |
} |
|
| 389 | ||
| 390 | 732496x |
return ret; |
| 391 |
} |
|
| 392 | ||
| 393 |
// ***************************************************************************************************// |
|
| 394 | ||
| 395 |
// determine set of present cov indices |
|
| 396 |
PosIntSet |
|
| 397 | 305776x |
ModelPar::getPresentCovs() const |
| 398 |
{
|
|
| 399 | 305776x |
PosIntSet ret; |
| 400 | ||
| 401 | 634544x |
for (PosInt i = 0; i != fpPars.size(); i++) |
| 402 |
{
|
|
| 403 | 328768x |
if (! fpPars.at(i).empty()) |
| 404 |
{
|
|
| 405 | 74400x |
ret.insert(i + 1); |
| 406 |
} |
|
| 407 |
} |
|
| 408 | ||
| 409 | 305776x |
if (! ucPars.empty()) |
| 410 |
{
|
|
| 411 | 165104x |
ret.insert(fpPars.size() + 1); |
| 412 |
} |
|
| 413 | ||
| 414 | 305776x |
return ret; |
| 415 |
} |
|
| 416 | ||
| 417 | ||
| 418 |
// ***************************************************************************************************// |
|
| 419 | ||
| 420 |
// push back index into covGroupWisePosteriors-Array |
|
| 421 |
void |
|
| 422 | 1024x |
ModelPar::pushInclusionProbs(const FpInfo& fpInfo, |
| 423 |
const UcInfo& ucInfo, |
|
| 424 |
Book& bookkeep) const |
|
| 425 |
{
|
|
| 426 |
for (PosInt i = 0; i != fpInfo.nFps; i++) |
|
| 427 |
{
|
|
| 428 | ! |
if (! fpPars.at(i).empty()) |
| 429 | ! |
bookkeep.covGroupWisePosteriors[i].add(bookkeep.modelCounter); |
| 430 |
} |
|
| 431 | ||
| 432 | 7168x |
for (PosInt i = 1; i <= ucInfo.nUcGroups; i++) |
| 433 |
{
|
|
| 434 |
// search for uc group i |
|
| 435 | 6144x |
IntSet::const_iterator ipos = find(ucPars.begin(), |
| 436 |
ucPars.end(), i); |
|
| 437 | 6144x |
if (ipos != ucPars.end()) |
| 438 |
{ // if mod.ucPars contains i
|
|
| 439 | 3072x |
bookkeep.covGroupWisePosteriors[i - 1 + fpInfo.nFps].add( |
| 440 | 3072x |
bookkeep.modelCounter); |
| 441 |
} |
|
| 442 |
} |
|
| 443 |
} |
|
| 444 | ||
| 445 | ||
| 446 | ||
| 447 |
// ***************************************************************************************************// |
|
| 448 | ||
| 449 |
// we do not need this function because two posteriors will never be exactly equal. |
|
| 450 |
// And if they happen to be equal, we do not care about the number of parameters. |
|
| 451 | ||
| 452 |
//bool Model::operator<(const Model& m) const // less |
|
| 453 |
//{
|
|
| 454 |
// double thisLogPost = info.logMargLik + info.logPrior; |
|
| 455 |
// double mLogPost = m.info.logMargLik + m.info.logPrior; |
|
| 456 |
// if (thisLogPost < mLogPost) |
|
| 457 |
// return true; |
|
| 458 |
// else if (thisLogPost > mLogPost) |
|
| 459 |
// return false; |
|
| 460 |
// else // posteriors are equal, then the parameter makes the decision |
|
| 461 |
// return m.par < par; |
|
| 462 |
//} |
|
| 463 | ||
| 464 |
// ***************************************************************************************************// |
|
| 465 | ||
| 466 |
// return the size of the model (excluding the intercept), |
|
| 467 |
// i.e. the number of coefficients |
|
| 468 |
PosInt |
|
| 469 | 51856x |
ModelPar::size(const UcInfo& ucInfo, const FixInfo& fixInfo) const |
| 470 |
{
|
|
| 471 |
// number of FP coefficients is easy |
|
| 472 | 51856x |
PosInt ret = fpSize; |
| 473 | ||
| 474 |
// but be careful for the UC coefficients! |
|
| 475 | 51856x |
for(IntSet::const_iterator |
| 476 | 51856x |
g = ucPars.begin(); |
| 477 | 103168x |
g != ucPars.end(); |
| 478 | 51312x |
++g) |
| 479 |
{
|
|
| 480 | 51312x |
ret += ucInfo.ucSizes.at(*g - 1); |
| 481 |
} |
|
| 482 | ||
| 483 |
// and for fixed coefficients! |
|
| 484 | 51856x |
for(IntSet::const_iterator |
| 485 | 51856x |
g = fixPars.begin(); |
| 486 | 51856x |
g != fixPars.end(); |
| 487 | ! |
++g) |
| 488 |
{
|
|
| 489 | ! |
ret += fixInfo.fixSizes.at(*g - 1); |
| 490 |
} |
|
| 491 |
|
|
| 492 |
// return the total |
|
| 493 | 51856x |
return ret; |
| 494 |
} |
|
| 495 | ||
| 496 |
// ***************************************************************************************************// |
|
| 497 | ||
| 498 |
// convert glm model into list for export to R |
|
| 499 |
List |
|
| 500 | 1024x |
Model::convert2list(const FpInfo& currFp, |
| 501 |
long double logNormConst, |
|
| 502 |
const Book& bookkeep) const |
|
| 503 |
{
|
|
| 504 | 2048x |
return List::create(_["configuration"] = par.convert2list(currFp), |
| 505 | 3072x |
_["information"] = info.convert2list(logNormConst, bookkeep)); |
| 506 |
} |
|
| 507 | ||
| 508 |
// ***************************************************************************************************// |
|
| 509 | ||
| 510 |
// compute nodes and log weights for given mode and var of target unnormalized density |
|
| 511 |
void |
|
| 512 | 21840x |
GaussHermite::getNodesAndLogWeights(double mode, double var, |
| 513 |
MyDoubleVector& nodes, MyDoubleVector& logWeights) const // output |
|
| 514 |
{
|
|
| 515 |
// logarithm of square root of (2 * var). |
|
| 516 | 21840x |
double logSqrt2Var = 0.5 * (M_LN2 + log(var)); |
| 517 | ||
| 518 | 21840x |
MyDoubleVector::const_iterator t = tVec.begin(); |
| 519 | 21840x |
for(MyDoubleVector::const_iterator |
| 520 | 21840x |
w = wVec.begin(); |
| 521 | 458640x |
w != wVec.end(); |
| 522 | 436800x |
++w, ++t) |
| 523 |
{
|
|
| 524 | 436800x |
nodes.push_back(mode + exp(logSqrt2Var) * (*t)); |
| 525 | 436800x |
logWeights.push_back(log(*w) + (*t) * (*t) + logSqrt2Var); |
| 526 |
} |
|
| 527 |
} |
|
| 528 | ||
| 529 |
// ***************************************************************************************************// |
|
| 530 | ||
| 531 |
// GlmModelConfig // |
|
| 532 | ||
| 533 |
// constructor |
|
| 534 | 912x |
GlmModelConfig::GlmModelConfig(List& rcpp_family, |
| 535 |
double nullModelLogMargLik, |
|
| 536 |
double nullModelDeviance, |
|
| 537 |
double fixedg, |
|
| 538 |
S4& rcpp_gPrior, |
|
| 539 |
const AVector& responses, |
|
| 540 |
bool debug, |
|
| 541 |
bool useFixedc, |
|
| 542 |
double empiricalMean, |
|
| 543 | 912x |
bool empiricalgPrior) : |
| 544 | 912x |
dispersions(as<NumericVector>(rcpp_family["dispersions"])), |
| 545 | 912x |
weights(as<NumericVector>(rcpp_family["weights"])), |
| 546 | 912x |
linPredStart(as<NumericVector>(rcpp_family["linPredStart"])), |
| 547 | 912x |
offsets(as<NumericVector>(rcpp_family["offsets"])), |
| 548 | 912x |
nullModelLogMargLik(nullModelLogMargLik), |
| 549 | 912x |
nullModelDeviance(nullModelDeviance), |
| 550 | 912x |
fixedg(fixedg), |
| 551 | 912x |
familyString(as<std::string>(rcpp_family["family"])), |
| 552 | 912x |
linkString(as<std::string>(rcpp_family["link"])), |
| 553 | 1728x |
canonicalLink((familyString == "binomial" && linkString == "logit") || |
| 554 |
(familyString == "poisson" && linkString == "log")), |
|
| 555 | 912x |
empiricalgPrior(empiricalgPrior) |
| 556 |
{
|
|
| 557 |
// and the phi from the R family object |
|
| 558 | 912x |
const double phi = rcpp_family["phi"]; |
| 559 | ||
| 560 | 912x |
if (familyString == "binomial") |
| 561 |
{
|
|
| 562 | 96x |
distribution = new Binomial(responses, |
| 563 | 96x |
weights); |
| 564 |
} |
|
| 565 | 816x |
else if (familyString == "gaussian") |
| 566 |
{
|
|
| 567 | 816x |
distribution = new Gaussian(responses, |
| 568 | 816x |
weights, |
| 569 | 816x |
phi); |
| 570 |
} |
|
| 571 | ! |
else if (familyString == "poisson") |
| 572 |
{
|
|
| 573 | ! |
distribution = new Poisson(responses, |
| 574 | ! |
weights); |
| 575 |
} |
|
| 576 |
else |
|
| 577 |
{
|
|
| 578 | ! |
Rf_error("Distribution not implemented");
|
| 579 |
} |
|
| 580 | ||
| 581 | 912x |
if (linkString == "logit") |
| 582 |
{
|
|
| 583 | 96x |
link = new LogitLink(); |
| 584 |
} |
|
| 585 | 816x |
else if (linkString == "probit") |
| 586 |
{
|
|
| 587 | ! |
link = new ProbitLink(); |
| 588 |
} |
|
| 589 | 816x |
else if (linkString == "cloglog") |
| 590 |
{
|
|
| 591 | ! |
link = new CloglogLink(); |
| 592 |
} |
|
| 593 | 816x |
else if (linkString == "inverse") |
| 594 |
{
|
|
| 595 | ! |
link = new InverseLink(); |
| 596 |
} |
|
| 597 | 816x |
else if (linkString == "log") |
| 598 |
{
|
|
| 599 | ! |
link = new LogLink(); |
| 600 |
} |
|
| 601 | 816x |
else if (linkString == "identity") |
| 602 |
{
|
|
| 603 | 816x |
link = new IdentityLink(); |
| 604 |
} |
|
| 605 |
else |
|
| 606 |
{
|
|
| 607 | ! |
Rf_error("Link not implemented!");
|
| 608 |
} |
|
| 609 | ||
| 610 | ||
| 611 | 912x |
if (useFixedc) {
|
| 612 |
|
|
| 613 |
// from the link and the distribution we can derive the constant factor |
|
| 614 |
// c = v(h(0)) / h'(0)^2 |
|
| 615 | ! |
double deriv = link->mu_eta(0); |
| 616 | ! |
cfactor = distribution->variance(link->linkinv(0)) / (deriv * deriv); |
| 617 |
} |
|
| 618 | 912x |
else if (!useFixedc) {
|
| 619 |
//Rprintf("used y mean of %f\n", empiricalMean);
|
|
| 620 | 912x |
double deriv = link->mu_eta(link->linkfun(empiricalMean)); |
| 621 | 912x |
cfactor = distribution->variance(link->linkinv(link->linkfun(empiricalMean))) / (deriv * deriv); |
| 622 |
|
|
| 623 |
} |
|
| 624 | ||
| 625 | 912x |
if(debug) |
| 626 |
{
|
|
| 627 | ! |
Rprintf("Factor c is %f\n", cfactor);
|
| 628 |
} |
|
| 629 | ||
| 630 |
// ensure that this is positive |
|
| 631 | 912x |
if(! (cfactor > 0.0)) |
| 632 |
{
|
|
| 633 | ! |
Rf_error("cfactor equal to %f, so not positive", cfactor);
|
| 634 |
} |
|
| 635 |
|
|
| 636 |
//we don't need c if we use the empirical prior |
|
| 637 |
if(empiricalgPrior) cfactor = 1; |
|
| 638 | ||
| 639 |
// finally the g-prior stuff: |
|
| 640 | ||
| 641 |
// first get the class name of the S4 g-prior object |
|
| 642 | 912x |
std::string gPriorString = rcpp_gPrior.attr("class");
|
| 643 | ||
| 644 |
// and then depending on the name, initialize our gPrior in C++. |
|
| 645 | 912x |
if (gPriorString == "HypergPrior") |
| 646 |
{
|
|
| 647 | ! |
gPrior = new HypergPrior(as<double>(rcpp_gPrior.slot("a")));
|
| 648 |
} |
|
| 649 | 912x |
else if (gPriorString == "InvGammaGPrior") |
| 650 |
{
|
|
| 651 | 192x |
gPrior = new InvGammaGPrior(as<double>(rcpp_gPrior.slot("a")),
|
| 652 | 96x |
as<double>(rcpp_gPrior.slot("b")));
|
| 653 |
} |
|
| 654 | 816x |
else if (gPriorString == "IncInvGammaGPrior") |
| 655 |
{
|
|
| 656 | ! |
gPrior = new IncInvGammaGPrior(as<double>(rcpp_gPrior.slot("a")),
|
| 657 | ! |
as<double>(rcpp_gPrior.slot("b")));
|
| 658 |
} |
|
| 659 | 816x |
else if (gPriorString == "CustomGPrior") |
| 660 |
{
|
|
| 661 | 816x |
gPrior = new CustomGPrior(as<SEXP>(rcpp_gPrior.slot("logDens")));
|
| 662 |
} |
|
| 663 |
else |
|
| 664 |
{
|
|
| 665 | ! |
Rf_error("g-prior not implemented!");
|
| 666 |
} |
|
| 667 |
} |
|
| 668 | ||
| 669 | ||
| 670 |
// ***************************************************************************************************// |
|
| 671 | ||
| 672 | ||
| 673 | ||
| 674 |
// dataValues // |
|
| 675 | ||
| 676 |
// 03/12/2012: add censoring indicator vector |
|
| 677 | ||
| 678 | 912x |
DataValues::DataValues(const AMatrix &x, |
| 679 |
const AMatrix &xcentered, |
|
| 680 |
const AVector &y, |
|
| 681 |
const IntVector &censInd, |
|
| 682 |
const double &totalNum, |
|
| 683 | 912x |
const IntSet& fixedCols) : |
| 684 | 912x |
design(x), |
| 685 | 912x |
centeredDesign(xcentered), |
| 686 | 912x |
response(y), |
| 687 | 912x |
censInd(censInd), |
| 688 | 912x |
nObs(design.n_rows), |
| 689 | 1824x |
onesVector(arma::ones<AVector>(nObs)), |
| 690 | 912x |
totalNumber(static_cast<PosLargeInt> (totalNum)), |
| 691 | 912x |
fixedCols(fixedCols) |
| 692 |
{
|
|
| 693 |
// and the SST |
|
| 694 | 2736x |
AVector centeredResponse = response - arma::mean(response); |
| 695 | 912x |
sumOfSquaresTotal = arma::dot(centeredResponse, centeredResponse); |
| 696 |
} |
|
| 697 | ||
| 698 | ||
| 699 | ||
| 700 |
// ***************************************************************************************************// |
|
| 701 | ||
| 702 | ||
| 703 |
// ModelCache // |
|
| 704 | ||
| 705 |
// insert model parameter and corresponding info into cache, |
|
| 706 |
// with caring about the maximum number of elements in the map. |
|
| 707 |
bool |
|
| 708 | 24288x |
ModelCache::insert(const ModelPar& par, const GlmModelInfo& info) |
| 709 |
{
|
|
| 710 |
// first check size of cache |
|
| 711 | 24288x |
if(isFull()) |
| 712 |
{
|
|
| 713 |
// if we are full, then check if this log posterior is better than |
|
| 714 |
// the worst cached model, which is pointed to by |
|
| 715 | ! |
MapType::iterator worstModelIter = *(modelIterSet.begin()); |
| 716 | ||
| 717 |
// the comparison |
|
| 718 | ! |
if((worstModelIter->second) < info) |
| 719 |
{
|
|
| 720 |
// new model is better than worst model cached. |
|
| 721 |
// so we delete the worst model from the cache. |
|
| 722 | ||
| 723 |
// first from the map |
|
| 724 | ! |
modelMap.erase(worstModelIter); |
| 725 |
// and then from the set |
|
| 726 | ! |
modelIterSet.erase(modelIterSet.begin()); |
| 727 |
} |
|
| 728 |
else |
|
| 729 |
{
|
|
| 730 |
// the new model is not better than the worst model cached, |
|
| 731 |
// so we do not cache it. |
|
| 732 | ! |
return false; |
| 733 |
} |
|
| 734 |
} |
|
| 735 | ||
| 736 |
// so now we know that we want to insert the model into the cache, |
|
| 737 |
// either because the cache was not full or because the new model was better |
|
| 738 |
// than the worst model cached. |
|
| 739 | ||
| 740 |
// -> try inserting into the map: |
|
| 741 | 24288x |
std::pair<MapType::iterator, bool> ret = modelMap.insert(MapType::value_type(par, info)); |
| 742 | ||
| 743 |
// if we were successful: |
|
| 744 | 24288x |
if(ret.second) |
| 745 |
{
|
|
| 746 |
// then also insert the iterator pointing to the map element into the set. |
|
| 747 | 24288x |
modelIterSet.insert(ret.first); |
| 748 | ||
| 749 |
// return success |
|
| 750 | 24288x |
return true; |
| 751 |
} |
|
| 752 |
else |
|
| 753 |
{
|
|
| 754 | ! |
return false; |
| 755 |
Rf_error("Should not happen: model already contained in model cache!");
|
|
| 756 |
} |
|
| 757 |
} |
|
| 758 | ||
| 759 |
// search for the log marginal likelihood of a model config in the map, |
|
| 760 |
// and return NA if not found |
|
| 761 |
GlmModelInfo |
|
| 762 | 802560x |
ModelCache::getModelInfo(const ModelPar& par) const |
| 763 |
{
|
|
| 764 |
// search for the config in the map |
|
| 765 | 802560x |
MapType::const_iterator ret = modelMap.find(par); |
| 766 | ||
| 767 |
// if found, return the log marg lik |
|
| 768 | 802560x |
if(ret != modelMap.end()) |
| 769 | 778496x |
return ret->second; |
| 770 |
else |
|
| 771 | 48128x |
return GlmModelInfo(R_NaReal, R_NaReal, Cache(), 0.0, 0.0, 0.0, R_NaReal); |
| 772 |
} |
|
| 773 | ||
| 774 |
// increment the sampling frequency for a model configuration |
|
| 775 |
// (of course, if this config is not cached nothing is done) |
|
| 776 |
void |
|
| 777 | 802560x |
ModelCache::incrementFrequency(const ModelPar& par) |
| 778 |
{
|
|
| 779 |
// search for the config in the map |
|
| 780 | 802560x |
MapType::iterator ret = modelMap.find(par); |
| 781 | ||
| 782 |
// if found, increment the hits |
|
| 783 | 802560x |
if(ret != modelMap.end()) |
| 784 | 802560x |
ret->second.hits++; |
| 785 |
} |
|
| 786 | ||
| 787 |
// compute the log normalising constant from all cached models |
|
| 788 |
long double |
|
| 789 | 224x |
ModelCache::getLogNormConstant() const |
| 790 |
{
|
|
| 791 |
// use safe summation |
|
| 792 | 224x |
SafeSum vec; |
| 793 | ||
| 794 |
// traverse the cache |
|
| 795 | 224x |
for(MapType::const_iterator |
| 796 | 224x |
m = modelMap.begin(); |
| 797 | 24512x |
m != modelMap.end(); |
| 798 | 24288x |
++m) |
| 799 |
{
|
|
| 800 |
// and add all unnormalized log posteriors |
|
| 801 | 24288x |
vec.add(m->second.logPost); |
| 802 |
} |
|
| 803 | ||
| 804 |
// return the log of the sum of the exp'ed saved elements |
|
| 805 | 448x |
return vec.logSumExp(); |
| 806 |
} |
|
| 807 | ||
| 808 |
// compute the inclusion probabilities from all cached models, |
|
| 809 |
// taking the log normalising constant, the number of FPs and the number of UC groups |
|
| 810 |
MyDoubleVector |
|
| 811 | 224x |
ModelCache::getInclusionProbs(long double logNormConstant, PosInt nFps, PosInt nUcs) const |
| 812 |
{
|
|
| 813 |
// abbreviation |
|
| 814 |
typedef std::vector<SafeSum> SafeSumVector; |
|
| 815 |
// allocate vector of safeSum objects for all FPs |
|
| 816 | 224x |
SafeSumVector fps(nFps); |
| 817 | ||
| 818 |
// and all UC groups |
|
| 819 | 224x |
SafeSumVector ucs(nUcs); |
| 820 | ||
| 821 |
// now process each model in the cache |
|
| 822 | 224x |
for(MapType::const_iterator |
| 823 | 224x |
m = modelMap.begin(); |
| 824 | 24512x |
m != modelMap.end(); |
| 825 | 24288x |
++m) |
| 826 |
{
|
|
| 827 |
// abbrevs |
|
| 828 | 24288x |
const ModelPar& thisPar = m->first; |
| 829 | 24288x |
const GlmModelInfo& thisInfo = m->second; |
| 830 | ||
| 831 |
// first process the FPs |
|
| 832 |
{
|
|
| 833 | 24288x |
SafeSumVector::iterator s = fps.begin(); |
| 834 | 24288x |
for (PowersVector::const_iterator |
| 835 | 24288x |
p = thisPar.fpPars.begin(); |
| 836 | 100128x |
p != thisPar.fpPars.end(); |
| 837 | 75840x |
++p, ++s) |
| 838 |
{
|
|
| 839 |
// is this FP in the model m? |
|
| 840 | 75840x |
if (! p->empty()) |
| 841 |
{
|
|
| 842 |
// then add the normalized model probability onto his FP stack |
|
| 843 | 46208x |
s->add(exp(thisInfo.logPost - logNormConstant)); |
| 844 |
} |
|
| 845 |
} |
|
| 846 |
} |
|
| 847 | ||
| 848 |
// then process the UC groups |
|
| 849 |
{
|
|
| 850 | 24288x |
SafeSumVector::iterator s = ucs.begin(); |
| 851 | 65760x |
for (PosInt i = 1; i <= nUcs; ++i, ++s) |
| 852 |
{
|
|
| 853 |
// is this UC group in the model m? |
|
| 854 | 41472x |
if (thisPar.ucPars.find(i) != thisPar.ucPars.end()) |
| 855 |
{
|
|
| 856 |
// then add the normalized model probability onto his UC stack |
|
| 857 | 20096x |
s->add(exp(thisInfo.logPost - logNormConstant)); |
| 858 |
} |
|
| 859 |
} |
|
| 860 |
} |
|
| 861 |
} // end processing all models in the cache |
|
| 862 | ||
| 863 |
// so now we can sum up safesum-wise to the return double vector |
|
| 864 | 224x |
MyDoubleVector ret; |
| 865 | ||
| 866 | 224x |
for(SafeSumVector::iterator |
| 867 | 224x |
s = fps.begin(); |
| 868 | 800x |
s != fps.end(); |
| 869 | 576x |
++s) |
| 870 |
{
|
|
| 871 | 576x |
ret.push_back(s->sum()); |
| 872 |
} |
|
| 873 | ||
| 874 | 224x |
for(SafeSumVector::iterator |
| 875 | 224x |
s = ucs.begin(); |
| 876 | 992x |
s != ucs.end(); |
| 877 | 768x |
++s) |
| 878 |
{
|
|
| 879 | 768x |
ret.push_back(s->sum()); |
| 880 |
} |
|
| 881 | ||
| 882 | 448x |
return ret; |
| 883 |
} |
|
| 884 | ||
| 885 |
// convert the best nModels from the cache into an R list |
|
| 886 |
List |
|
| 887 | 224x |
ModelCache::getListOfBestModels(const FpInfo& fpInfo, |
| 888 |
long double logNormConst, |
|
| 889 |
const Book& bookkeep) const |
|
| 890 |
{
|
|
| 891 |
// allocate the return list |
|
| 892 | 224x |
List ret(std::min(bookkeep.nModels, |
| 893 | 224x |
static_cast<PosInt>(modelIterSet.size()))); |
| 894 |
// cast is necessary for gcc-4.2 on Mac on R-forge. |
|
| 895 | ||
| 896 |
// process the ordered list of best models from the end (because the set is ordered increasingly) |
|
| 897 | 224x |
PosInt i = 0; |
| 898 | 224x |
for(SetType::const_reverse_iterator |
| 899 | 224x |
s = modelIterSet.rbegin(); |
| 900 | 5808x |
(i < bookkeep.nModels) && (s != modelIterSet.rend()); // so the return list has min(nModels, modelIterSet.size()) elements. |
| 901 | 5584x |
++s, ++i) |
| 902 |
{
|
|
| 903 |
// allocate two-element list in the i-th slot of the return list |
|
| 904 | 11168x |
ret[i] = List::create(_["configuration"] = (**s).first.convert2list(fpInfo), |
| 905 | 11168x |
_["information"] = (**s).second.convert2list(logNormConst, |
| 906 | 16752x |
bookkeep)); |
| 907 |
} |
|
| 908 | ||
| 909 | 224x |
return ret; |
| 910 |
} |
|
| 911 | ||
| 912 | ||
| 913 |
// End of file. |
|
| 914 |
| 1 |
#ifndef DATASTRUCTURE_H_ |
|
| 2 |
#define DATASTRUCTURE_H_ |
|
| 3 | ||
| 4 |
#include <set> |
|
| 5 |
#include <map> |
|
| 6 |
#include <vector> |
|
| 7 |
#include <iterator> |
|
| 8 |
#include <numeric> |
|
| 9 |
#include <string> |
|
| 10 | ||
| 11 |
#include "rcppExport.h" |
|
| 12 |
#include "types.h" |
|
| 13 |
#include "functionWraps.h" |
|
| 14 |
#include "fpUcHandling.h" |
|
| 15 |
#include "links.h" |
|
| 16 |
#include "distributions.h" |
|
| 17 |
#include "gpriors.h" |
|
| 18 | ||
| 19 | ||
| 20 |
// ***************************************************************************************************// |
|
| 21 | ||
| 22 |
struct SafeSum |
|
| 23 |
{
|
|
| 24 |
LongDoubleVector vals; |
|
| 25 | ||
| 26 |
void |
|
| 27 |
add(const long double &val); |
|
| 28 | ||
| 29 |
// compute the sum of the elements using accurate algorithm |
|
| 30 |
long double |
|
| 31 |
sum(); |
|
| 32 | ||
| 33 |
// compute the log of the sum of the exp of the elements using accurate algorithm, |
|
| 34 |
// and avoiding infinite contributions. |
|
| 35 |
long double |
|
| 36 |
logSumExp(); |
|
| 37 | ||
| 38 |
// compute the sum of the elements using very simple algorithm |
|
| 39 |
long double |
|
| 40 |
simpleSum(); |
|
| 41 |
}; |
|
| 42 | ||
| 43 |
// ***************************************************************************************************// |
|
| 44 | ||
| 45 |
struct IndexSafeSum |
|
| 46 |
{
|
|
| 47 |
// the model index collection |
|
| 48 |
IndSet indices; |
|
| 49 | ||
| 50 |
// add a model index i to the collection indices |
|
| 51 |
void |
|
| 52 |
add(const Ind& i); |
|
| 53 | ||
| 54 |
// taking the safe sum object s of log posteriors, and the associated log normalizing |
|
| 55 |
// constant logNormConst, |
|
| 56 |
// compute the sum \sum_i in indices exp{s_i - logNormConst}.
|
|
| 57 |
long double |
|
| 58 |
sumNormalizedExp(const SafeSum& s, long double logNormConst) const; |
|
| 59 |
}; |
|
| 60 | ||
| 61 |
// ***************************************************************************************************// |
|
| 62 | ||
| 63 |
// 21/11/2012: add tbf option |
|
| 64 | ||
| 65 |
// bookkeeping, shall later be used by glm and hyper-g! |
|
| 66 |
struct Book |
|
| 67 |
{
|
|
| 68 |
PosLargeInt modelCounter; |
|
| 69 |
PosLargeInt chainlength; |
|
| 70 |
PosLargeInt nanCounter; |
|
| 71 | ||
| 72 |
SafeSum modelLogPosteriors; |
|
| 73 |
//IndexSafeSum *covGroupWisePosteriors; // for computation of covariate inclusion probs: array (bfp, uc) |
|
| 74 |
std::vector<IndexSafeSum> covGroupWisePosteriors; // for computation of covariate inclusion probs: array (bfp, uc) |
|
| 75 | ||
| 76 |
const bool tbf; |
|
| 77 |
const bool doGlm; |
|
| 78 |
const bool empiricalBayes; |
|
| 79 |
const bool useFixedg; |
|
| 80 |
const bool useFixedc; |
|
| 81 |
const bool doSampling; |
|
| 82 |
const bool verbose; |
|
| 83 |
const std::string modelPrior; |
|
| 84 | ||
| 85 |
const PosInt nModels; |
|
| 86 |
const PosInt nCache; |
|
| 87 | ||
| 88 |
const double largeVariance; |
|
| 89 |
const bool useBfgs; |
|
| 90 |
const bool debug; |
|
| 91 | ||
| 92 |
const bool higherOrderCorrection; |
|
| 93 | ||
| 94 |
// constructor which checks the chainlength |
|
| 95 |
Book(bool tbf, |
|
| 96 |
bool doGlm, |
|
| 97 |
bool empiricalBayes, |
|
| 98 |
bool useFixedg, |
|
| 99 |
bool useFixedc, |
|
| 100 |
double cl, |
|
| 101 |
bool doSampling, |
|
| 102 |
bool verbose, |
|
| 103 |
std::string modelPrior, |
|
| 104 |
PosInt nModels, |
|
| 105 |
PosInt nCache, |
|
| 106 |
double largeVariance, |
|
| 107 |
bool useBfgs, |
|
| 108 |
bool debug, |
|
| 109 |
bool higherOrderCorrection); |
|
| 110 | ||
| 111 |
}; |
|
| 112 | ||
| 113 |
// ***************************************************************************************************// |
|
| 114 | ||
| 115 |
// data structure: model information for general models. |
|
| 116 |
// keep in mind that contents must be assignable. |
|
| 117 |
struct ModelInfo |
|
| 118 |
{
|
|
| 119 |
// most important: |
|
| 120 |
double logMargLik; |
|
| 121 |
double logPrior; |
|
| 122 | ||
| 123 |
// just the sum of logMargLik and logPrior. |
|
| 124 |
// But we need it often, so save it here (the ctr computes it). |
|
| 125 |
double logPost; |
|
| 126 | ||
| 127 |
// only needed for MCMC: |
|
| 128 |
PosLargeInt hits; |
|
| 129 | ||
| 130 |
// simple constructor |
|
| 131 | 9381x |
ModelInfo(double logMargLik, double logPrior, PosLargeInt hits = 0) : |
| 132 | 9381x |
logMargLik(logMargLik), logPrior(logPrior), logPost(logMargLik + logPrior), hits(hits) |
| 133 |
{
|
|
| 134 |
} |
|
| 135 | ||
| 136 |
// compare two model infos bei their posterior probability: |
|
| 137 |
// the model info with the lower one is lower. |
|
| 138 |
bool |
|
| 139 | 1956x |
operator<(const ModelInfo& m) const |
| 140 |
{
|
|
| 141 | 1956x |
return logPost < m.logPost; |
| 142 |
} |
|
| 143 |
}; |
|
| 144 | ||
| 145 |
// ***************************************************************************************************// |
|
| 146 | ||
| 147 |
//// specialization for normal model with hyper-g prior |
|
| 148 |
//struct NormalModelInfo : public ModelInfo |
|
| 149 |
//{
|
|
| 150 |
// // additional to ModelInfo: |
|
| 151 |
// |
|
| 152 |
// // posterior expected factor g given this model |
|
| 153 |
// double postExpectedg; |
|
| 154 |
// |
|
| 155 |
// // posterior expected shrinkage factor g/(1+g) given this model |
|
| 156 |
// double postExpectedShrinkage; |
|
| 157 |
// |
|
| 158 |
// // coefficient of determination for this model |
|
| 159 |
// double R2; |
|
| 160 |
// |
|
| 161 |
// // constructor |
|
| 162 |
// NormalModelInfo(double logMargLik, |
|
| 163 |
// double logPrior, |
|
| 164 |
// double postExpectedg, |
|
| 165 |
// double postExpectedShrinkage, |
|
| 166 |
// double R2, |
|
| 167 |
// PosLargeInt hits) : |
|
| 168 |
// ModelInfo(logMargLik, logPrior, hits), |
|
| 169 |
// postExpectedg(postExpectedg), |
|
| 170 |
// postExpectedShrinkage(postExpectedShrinkage), |
|
| 171 |
// R2(R2) |
|
| 172 |
// {
|
|
| 173 |
// } |
|
| 174 |
//}; |
|
| 175 | ||
| 176 |
// ***************************************************************************************************// |
|
| 177 | ||
| 178 |
// specialization for GLM model |
|
| 179 |
struct GlmModelInfo : public ModelInfo |
|
| 180 |
{
|
|
| 181 | 9258x |
GlmModelInfo(double logMargLik, |
| 182 |
double logPrior, |
|
| 183 |
const Cache& cache, |
|
| 184 |
double zMode, |
|
| 185 |
double zVar, |
|
| 186 |
double laplaceApprox, |
|
| 187 | 9258x |
double residualDeviance) : |
| 188 |
ModelInfo(logMargLik, |
|
| 189 |
logPrior), |
|
| 190 | 9258x |
negLogUnnormZDensities(cache), |
| 191 | 9258x |
zMode(zMode), |
| 192 | 9258x |
zVar(zVar), |
| 193 | 9258x |
laplaceApprox(laplaceApprox), |
| 194 | 9258x |
residualDeviance(residualDeviance) |
| 195 |
{
|
|
| 196 |
} |
|
| 197 | ||
| 198 |
// cache of negative log unnormalized posterior density of z evaluations: |
|
| 199 | ||
| 200 |
// the z values |
|
| 201 |
Cache negLogUnnormZDensities; |
|
| 202 | ||
| 203 |
// moment estimates: |
|
| 204 | ||
| 205 |
// mode of the z posterior (z*) |
|
| 206 |
double zMode; |
|
| 207 | ||
| 208 |
// and associated estimated variance (sigma*^2) |
|
| 209 |
double zVar; |
|
| 210 | ||
| 211 |
// here is the resulting Laplace approximation of the log marginal likelihood |
|
| 212 |
double laplaceApprox; |
|
| 213 | ||
| 214 |
// this is only filled in the TBF case |
|
| 215 |
double residualDeviance; |
|
| 216 | ||
| 217 |
// convert to R list |
|
| 218 |
Rcpp::List |
|
| 219 |
convert2list(long double logNormConst, |
|
| 220 |
const Book& bookkeep) const; |
|
| 221 | ||
| 222 |
// conversion of R list to modelInfo: |
|
| 223 |
// Cache and hits are not filled! |
|
| 224 |
explicit |
|
| 225 | 82x |
GlmModelInfo(Rcpp::List rcpp_information) : |
| 226 |
ModelInfo(rcpp_information["logMargLik"], |
|
| 227 |
rcpp_information["logPrior"], |
|
| 228 |
NA_INTEGER), |
|
| 229 | 82x |
zMode(rcpp_information["zMode"]), |
| 230 | 82x |
zVar(rcpp_information["zVar"]), |
| 231 | 82x |
laplaceApprox(rcpp_information["laplaceApprox"]), |
| 232 | 164x |
residualDeviance(rcpp_information["residualDeviance"]) |
| 233 |
{
|
|
| 234 |
} |
|
| 235 |
}; |
|
| 236 | ||
| 237 |
// ***************************************************************************************************// |
|
| 238 | ||
| 239 |
// model parameter object: must have a strict weak ordering |
|
| 240 |
struct ModelPar |
|
| 241 |
{
|
|
| 242 |
PowersVector fpPars; // vector of multisets |
|
| 243 | ||
| 244 |
// not needed: just use fpPars.size() |
|
| 245 |
// PosInt nFps; // length of vector |
|
| 246 | ||
| 247 |
PosInt fpSize; // number of fp powers |
|
| 248 |
IntSet ucPars; // set of group indices, starting from 1 (!) |
|
| 249 |
IntSet fixPars; // set of group indices, starting from 1 (!) |
|
| 250 | ||
| 251 |
// not needed: just use ucPars.size() |
|
| 252 |
// PosInt ucSize; // number of uc Groups included |
|
| 253 | ||
| 254 |
// start with an empty (null) model: |
|
| 255 | 120x |
ModelPar(PosInt nFps) : |
| 256 | 120x |
fpPars(nFps), fpSize(0), ucPars(), fixPars() |
| 257 |
{
|
|
| 258 |
} |
|
| 259 | ||
| 260 |
// comparison of configurations |
|
| 261 |
bool |
|
| 262 |
operator<(const ModelPar& m) const; |
|
| 263 | ||
| 264 |
// return a textual description of this model configuration |
|
| 265 |
std::string |
|
| 266 |
print(const FpInfo& fpInfo) const; |
|
| 267 | ||
| 268 |
// return the size of the model (excluding the intercept) |
|
| 269 |
PosInt |
|
| 270 |
size(const UcInfo& ucInfo, const FixInfo& fixInfo) const; |
|
| 271 | ||
| 272 |
// convert to R list |
|
| 273 |
Rcpp::List |
|
| 274 |
convert2list(const FpInfo& currFp) const; |
|
| 275 | ||
| 276 |
// convert R list to ModelPar |
|
| 277 |
ModelPar(Rcpp::List rcpp_configuration, |
|
| 278 |
const FpInfo& fpInfo); |
|
| 279 | ||
| 280 |
// compute set of free uc group indices in a model configuration |
|
| 281 |
IntSet |
|
| 282 |
getFreeUcs(const PosIntVector& ucSizes, |
|
| 283 |
const PosInt& currDim, |
|
| 284 |
const PosInt& maxDim) const; |
|
| 285 | ||
| 286 |
// compute set of free cov indices in a model configuration |
|
| 287 |
PosIntSet |
|
| 288 |
getFreeCovs(const FpInfo& currFp, |
|
| 289 |
const IntSet& freeUcs, |
|
| 290 |
const PosInt& currDim, |
|
| 291 |
const PosInt& maxDim) const; |
|
| 292 | ||
| 293 |
// determine set of present cov indices |
|
| 294 |
PosIntSet |
|
| 295 |
getPresentCovs() const; |
|
| 296 | ||
| 297 |
// push back index into covGroupWisePosteriors-Array |
|
| 298 |
void |
|
| 299 |
pushInclusionProbs(const FpInfo& fpInfo, |
|
| 300 |
const UcInfo& ucInfo, |
|
| 301 |
Book& bookkeep) const; |
|
| 302 |
}; |
|
| 303 | ||
| 304 |
// ***************************************************************************************************// |
|
| 305 | ||
| 306 | ||
| 307 |
// first only for Glms. Possibly extended with template class for ModelInfo to NormalModelInfo. |
|
| 308 |
struct Model |
|
| 309 |
{
|
|
| 310 |
ModelPar par; |
|
| 311 |
GlmModelInfo info; |
|
| 312 | ||
| 313 |
// initialize |
|
| 314 | 840x |
Model(const ModelPar& p, const GlmModelInfo& i) : |
| 315 | 840x |
par(p), info(i) |
| 316 |
{
|
|
| 317 |
} |
|
| 318 | ||
| 319 |
// fully forward the comparison to the model parameter and info class. |
|
| 320 |
// we need to include the par comparison because sometimes |
|
| 321 |
// really the info is identical... |
|
| 322 |
bool |
|
| 323 | 3536x |
operator<(const Model& m) const // less |
| 324 |
{
|
|
| 325 | 3536x |
if(info < m.info) |
| 326 | 1856x |
return true; |
| 327 | 1680x |
else if(m.info < info) |
| 328 | 1392x |
return false; |
| 329 |
else |
|
| 330 | 288x |
return par < m.par; |
| 331 |
} |
|
| 332 | ||
| 333 |
// model to list |
|
| 334 |
Rcpp::List |
|
| 335 |
convert2list(const FpInfo& currFp, |
|
| 336 |
long double logNormConst, |
|
| 337 |
const Book& bookkeep) const; |
|
| 338 |
}; |
|
| 339 | ||
| 340 | ||
| 341 |
// ***************************************************************************************************// |
|
| 342 | ||
| 343 | ||
| 344 |
class GaussHermite {
|
|
| 345 |
public: |
|
| 346 |
// compute nodes and log weights for given mode and sd of target unnormalized density |
|
| 347 |
void |
|
| 348 |
getNodesAndLogWeights(double mode, double var, |
|
| 349 |
MyDoubleVector& nodes, MyDoubleVector& logWeights) const; // output |
|
| 350 | ||
| 351 |
// constructor |
|
| 352 | 120x |
GaussHermite(Rcpp::List rcpp_gaussHermite) : |
| 353 | 120x |
tVec(Rcpp::as<MyDoubleVector>(rcpp_gaussHermite["nodes"])), |
| 354 | 120x |
wVec(Rcpp::as<MyDoubleVector>(rcpp_gaussHermite["weights"])) |
| 355 |
{
|
|
| 356 |
} |
|
| 357 | ||
| 358 |
private: |
|
| 359 |
const MyDoubleVector tVec; // nodes |
|
| 360 |
const MyDoubleVector wVec; // weights (not log!) |
|
| 361 |
}; |
|
| 362 | ||
| 363 | ||
| 364 |
// ***************************************************************************************************// |
|
| 365 | ||
| 366 | ||
| 367 | ||
| 368 |
struct GlmModelConfig |
|
| 369 |
{
|
|
| 370 |
// vector of dispersions (phi / weights) |
|
| 371 |
const AVector dispersions; |
|
| 372 | ||
| 373 |
// vector of weights |
|
| 374 |
const AVector weights; |
|
| 375 | ||
| 376 |
// vector of starting values for the linear predictor |
|
| 377 |
// constant, because for each model we will initially start from that. |
|
| 378 |
const AVector linPredStart; |
|
| 379 | ||
| 380 |
// the vector of offsets |
|
| 381 |
const AVector offsets; |
|
| 382 | ||
| 383 |
// log marg lik of the the null model |
|
| 384 |
const double nullModelLogMargLik; |
|
| 385 | ||
| 386 |
// deviance of the the null model |
|
| 387 |
const double nullModelDeviance; |
|
| 388 | ||
| 389 |
// fixed value of g |
|
| 390 |
const double fixedg; |
|
| 391 | ||
| 392 |
// the g-prior information |
|
| 393 |
const GPrior* gPrior; |
|
| 394 | ||
| 395 |
// the link information |
|
| 396 |
const Link* link; |
|
| 397 | ||
| 398 |
// the distribution information |
|
| 399 |
const Distribution* distribution; |
|
| 400 | ||
| 401 |
// the constant factor deriving from the link and the distribution, |
|
| 402 |
// which we need for the generalized g-prior |
|
| 403 |
double cfactor; |
|
| 404 | ||
| 405 |
// save also the names of link and distribution |
|
| 406 |
const std::string familyString; |
|
| 407 |
const std::string linkString; |
|
| 408 | ||
| 409 |
// does this model use the canonical link? |
|
| 410 |
const bool canonicalLink; |
|
| 411 | ||
| 412 |
// should we use the empirical g prior which uses |
|
| 413 |
// the information matrix ( J(B)^-1 ) for the covariance instead of (X'X)^-1 |
|
| 414 |
const bool empiricalgPrior; |
|
| 415 |
|
|
| 416 |
// constructor |
|
| 417 |
GlmModelConfig(Rcpp::List& rcpp_family, |
|
| 418 |
double nullModelLogMargLik, |
|
| 419 |
double nullModelDeviance, |
|
| 420 |
double fixedg, |
|
| 421 |
Rcpp::S4& rcpp_gPrior, |
|
| 422 |
const AVector& responses, |
|
| 423 |
bool debug, |
|
| 424 |
bool useFixedc, |
|
| 425 |
double empiricalMean, |
|
| 426 |
bool empiricalgPrior); |
|
| 427 | ||
| 428 |
// destructor |
|
| 429 | 171x |
~GlmModelConfig() |
| 430 |
{
|
|
| 431 | 171x |
delete gPrior; |
| 432 | 171x |
delete link; |
| 433 | 171x |
delete distribution; |
| 434 |
} |
|
| 435 |
}; |
|
| 436 | ||
| 437 |
// ***************************************************************************************************// |
|
| 438 | ||
| 439 | ||
| 440 |
struct DataValues |
|
| 441 |
{
|
|
| 442 |
AMatrix design; |
|
| 443 |
AMatrix centeredDesign; |
|
| 444 | ||
| 445 |
AVector response; |
|
| 446 |
double sumOfSquaresTotal; |
|
| 447 | ||
| 448 |
IntVector censInd; |
|
| 449 | ||
| 450 |
int nObs; |
|
| 451 | ||
| 452 |
AVector onesVector; |
|
| 453 | ||
| 454 |
PosLargeInt totalNumber; // cardinality of model space |
|
| 455 | ||
| 456 |
const IntSet fixedCols; |
|
| 457 | ||
| 458 |
DataValues(const AMatrix &x, |
|
| 459 |
const AMatrix &xcentered, |
|
| 460 |
const AVector &y, |
|
| 461 |
const IntVector &censInd, |
|
| 462 |
const double &totalNum, |
|
| 463 |
const IntSet& fixedCols); |
|
| 464 |
}; |
|
| 465 | ||
| 466 | ||
| 467 |
// ***************************************************************************************************// |
|
| 468 | ||
| 469 |
// first only for GLM models: |
|
| 470 |
// the model cache class. |
|
| 471 | ||
| 472 |
// Caches the best models in a map of a given maximum size, and also stores the |
|
| 473 |
// (unnormalized) log posterior probabilities in an ordered set, pointing to the models in the map. |
|
| 474 |
class ModelCache {
|
|
| 475 |
public: |
|
| 476 | ||
| 477 | ||
| 478 |
// create a new ModelCache with given maximum size. |
|
| 479 | 112x |
ModelCache(int maxSize) : |
| 480 | 112x |
maxSize(maxSize), |
| 481 | 112x |
modelMap(), |
| 482 | 112x |
modelIterSet() |
| 483 |
{
|
|
| 484 |
} |
|
| 485 | ||
| 486 |
// check if max size was reached |
|
| 487 |
bool |
|
| 488 | 4554x |
isFull() const |
| 489 |
{
|
|
| 490 | 4554x |
return modelMap.size() == maxSize; |
| 491 |
} |
|
| 492 | ||
| 493 |
// return size of cache |
|
| 494 |
int |
|
| 495 | 144x |
size() const |
| 496 |
{
|
|
| 497 | 144x |
return modelMap.size(); |
| 498 |
} |
|
| 499 | ||
| 500 |
// insert model parameter and belonging model info into the cache. |
|
| 501 |
// returns false if not inserted (e.g. because the par was |
|
| 502 |
// already inside, or the model was not good enough) |
|
| 503 |
bool |
|
| 504 |
insert(const ModelPar& par, const GlmModelInfo& info); |
|
| 505 | ||
| 506 |
// search for the model info of a model config in the map, |
|
| 507 |
// and return an information with NA for log marg lik if not found |
|
| 508 |
GlmModelInfo |
|
| 509 |
getModelInfo(const ModelPar& par) const; |
|
| 510 | ||
| 511 |
// increment the sampling frequency for a model configuration |
|
| 512 |
// (of course, if this config is not cached nothing is done!) |
|
| 513 |
void |
|
| 514 |
incrementFrequency(const ModelPar& par); |
|
| 515 | ||
| 516 |
// compute the log normalising constant from all cached models |
|
| 517 |
long double |
|
| 518 |
getLogNormConstant() const; |
|
| 519 | ||
| 520 |
// compute the inclusion probabilities from all cached models, |
|
| 521 |
// taking the log normalising constant and the total number of FPs / UC groups |
|
| 522 |
MyDoubleVector |
|
| 523 |
getInclusionProbs(long double logNormConstant, PosInt nFps, PosInt nUcs) const; |
|
| 524 | ||
| 525 |
// convert the best nModels from the cache into an R list |
|
| 526 |
Rcpp::List |
|
| 527 |
getListOfBestModels(const FpInfo& fpInfo, |
|
| 528 |
long double logNormConst, |
|
| 529 |
const Book& bookkeep) const; |
|
| 530 | ||
| 531 | ||
| 532 |
private: |
|
| 533 |
// the map type |
|
| 534 |
typedef std::map<ModelPar, GlmModelInfo> MapType; |
|
| 535 | ||
| 536 |
// define comparison function for iterators |
|
| 537 |
struct Compare_map_iterators |
|
| 538 |
{
|
|
| 539 |
bool |
|
| 540 | 38250x |
operator()(const MapType::iterator& first, const MapType::iterator& second) const |
| 541 |
{
|
|
| 542 | 38250x |
return (first->second.logPost) < (second->second.logPost); |
| 543 |
} |
|
| 544 |
}; |
|
| 545 | ||
| 546 |
// the set type of ordered map iterators |
|
| 547 |
typedef std::set<MapType::iterator, Compare_map_iterators> SetType; |
|
| 548 | ||
| 549 |
// and finally the data members |
|
| 550 |
const MapType::size_type maxSize; |
|
| 551 |
MapType modelMap; |
|
| 552 |
SetType modelIterSet; |
|
| 553 |
}; |
|
| 554 | ||
| 555 |
// ***************************************************************************************************// |
|
| 556 | ||
| 557 |
// all information needed in mcmc function |
|
| 558 |
struct ModelMcmc |
|
| 559 |
{
|
|
| 560 |
// initialize with the null model only. |
|
| 561 | 112x |
ModelMcmc(const FpInfo& fpInfo, |
| 562 |
const UcInfo& ucInfo, |
|
| 563 |
PosInt maxDim, |
|
| 564 | 112x |
double logMargLikNullModel) : |
| 565 | 112x |
modPar(fpInfo.nFps), |
| 566 | 112x |
dim(1), |
| 567 | 112x |
freeUcs(modPar.getFreeUcs(ucInfo.ucSizes, dim, maxDim)), |
| 568 | 112x |
freeCovs(modPar.getFreeCovs(fpInfo, freeUcs, dim, maxDim)), |
| 569 | 112x |
presentCovs(modPar.getPresentCovs()), |
| 570 | 112x |
birthprob(1.0), |
| 571 | 112x |
deathprob(0.0), |
| 572 | 112x |
moveprob(0.0), |
| 573 | 112x |
logMargLik(logMargLikNullModel), |
| 574 | 112x |
logPrior(R_NaN) |
| 575 |
{
|
|
| 576 |
} |
|
| 577 | ||
| 578 |
ModelPar modPar; |
|
| 579 |
PosInt dim; // number of columns in this model's design matrix |
|
| 580 | ||
| 581 |
IntSet freeUcs; // indices within uc groups, denoting the birthable ones |
|
| 582 |
PosIntSet freeCovs; // indices of free covs (starting from first fp with index 1 up to uc index = nFps + 1) |
|
| 583 |
PosIntSet presentCovs; // analogue |
|
| 584 | ||
| 585 |
double birthprob, deathprob, moveprob; // move type probabilities, switchprob is 1-bprob-dprob-mprob. |
|
| 586 |
double logMargLik; |
|
| 587 |
double logPrior; |
|
| 588 |
}; |
|
| 589 | ||
| 590 |
// ***************************************************************************************************// |
|
| 591 | ||
| 592 |
#endif /*DATASTRUCTURE_H_*/ |
| 1 |
/* |
|
| 2 |
* distributions.h |
|
| 3 |
* |
|
| 4 |
* Created on: 15.03.2010 |
|
| 5 |
* Author: daniel |
|
| 6 |
* |
|
| 7 |
* Classes for exponential family distributions. |
|
| 8 |
* |
|
| 9 |
*/ |
|
| 10 | ||
| 11 |
#ifndef DISTRIBUTIONS_H_ |
|
| 12 |
#define DISTRIBUTIONS_H_ |
|
| 13 | ||
| 14 |
#include "types.h" |
|
| 15 |
#include "rcppExport.h" |
|
| 16 | ||
| 17 | ||
| 18 |
// ***************************************************************************************************// |
|
| 19 | ||
| 20 |
// General virtual class for an exponential family distribution, including |
|
| 21 |
// the variance function of the modelled mean mu |
|
| 22 |
// and the loglikelihood of a whole vector of modelled means. |
|
| 23 |
// We also always save the vector of scalar responses and weights, because naturally |
|
| 24 |
// the loglikelihood of "means" depends on this! |
|
| 25 |
class Distribution |
|
| 26 |
{
|
|
| 27 |
public: |
|
| 28 |
// ctr: |
|
| 29 | 114x |
Distribution(const AVector& responses, |
| 30 | 114x |
const AVector& weights) : |
| 31 | 114x |
responses(responses), |
| 32 | 114x |
weights(weights) |
| 33 |
{
|
|
| 34 |
} |
|
| 35 | ||
| 36 |
// variance function |
|
| 37 |
virtual double |
|
| 38 |
variance(double mu) const = 0; |
|
| 39 | ||
| 40 |
// loglikelihood |
|
| 41 |
virtual double |
|
| 42 |
loglik(const double *means) const = 0; |
|
| 43 | ||
| 44 |
// we need a virtual destructor here, |
|
| 45 |
// cf. Accelerated C++ pp. 242 ff. |
|
| 46 | 798x |
virtual ~Distribution(){}
|
| 47 | ||
| 48 |
protected: |
|
| 49 |
// the vector of responses |
|
| 50 |
const AVector responses; |
|
| 51 | ||
| 52 |
// the vector of weights |
|
| 53 |
const AVector weights; |
|
| 54 |
}; |
|
| 55 | ||
| 56 |
// ***************************************************************************************************// |
|
| 57 | ||
| 58 |
// The binomial distribution |
|
| 59 |
class Binomial : public Distribution |
|
| 60 |
{
|
|
| 61 |
public: |
|
| 62 |
// ctr |
|
| 63 | 12x |
Binomial(const AVector& responses, |
| 64 | 12x |
const AVector& weights) : |
| 65 |
Distribution(responses, |
|
| 66 | 12x |
weights) |
| 67 |
{
|
|
| 68 |
} |
|
| 69 | ||
| 70 |
// variance function |
|
| 71 |
double |
|
| 72 | 319705484x |
variance(double mu) const |
| 73 |
{
|
|
| 74 | 319705484x |
return mu * (1.0 - mu); |
| 75 |
} |
|
| 76 | ||
| 77 |
// loglikelihood |
|
| 78 |
double |
|
| 79 |
loglik(const double *means) const; |
|
| 80 |
}; |
|
| 81 | ||
| 82 |
// ***************************************************************************************************// |
|
| 83 | ||
| 84 |
// The Gaussian distribution |
|
| 85 |
class Gaussian : public Distribution |
|
| 86 |
{
|
|
| 87 |
public: |
|
| 88 |
// ctr |
|
| 89 | 102x |
Gaussian(const AVector& responses, |
| 90 |
const AVector& weights, |
|
| 91 | 102x |
double phi) : |
| 92 |
Distribution(responses, weights), |
|
| 93 | 102x |
phi(phi) |
| 94 |
{
|
|
| 95 |
} |
|
| 96 | ||
| 97 |
// variance function |
|
| 98 |
double |
|
| 99 | 714x |
variance(double mu) const |
| 100 |
{
|
|
| 101 | 714x |
return 1.0; |
| 102 |
} |
|
| 103 | ||
| 104 |
// loglikelihood |
|
| 105 |
double |
|
| 106 |
loglik(const double *means) const; |
|
| 107 | ||
| 108 |
private: |
|
| 109 | ||
| 110 |
// the dispersion factor (variance sigma^2) |
|
| 111 |
const double phi; |
|
| 112 |
}; |
|
| 113 | ||
| 114 |
// ***************************************************************************************************// |
|
| 115 | ||
| 116 |
// The Poisson distribution |
|
| 117 |
class Poisson : public Distribution |
|
| 118 |
{
|
|
| 119 |
public: |
|
| 120 |
// ctr |
|
| 121 | ! |
Poisson(const AVector& responses, |
| 122 | ! |
const AVector& weights) : |
| 123 |
Distribution(responses, |
|
| 124 | ! |
weights) |
| 125 |
{
|
|
| 126 |
} |
|
| 127 | ||
| 128 |
// variance function |
|
| 129 |
double |
|
| 130 | ! |
variance(double mu) const |
| 131 |
{
|
|
| 132 | ! |
return mu; |
| 133 |
} |
|
| 134 | ||
| 135 |
// loglikelihood |
|
| 136 |
double |
|
| 137 |
loglik(const double *means) const; |
|
| 138 |
}; |
|
| 139 | ||
| 140 | ||
| 141 |
#endif /* DISTRIBUTIONS_H_ */ |
| 1 |
/* |
|
| 2 |
* gpriors.h |
|
| 3 |
* |
|
| 4 |
* Created on: 15.03.2010 |
|
| 5 |
* Author: daniel |
|
| 6 |
* |
|
| 7 |
* Classes for priors on the shrinkage paramater g. |
|
| 8 |
* |
|
| 9 |
*/ |
|
| 10 | ||
| 11 |
#ifndef GPRIORS_H_ |
|
| 12 |
#define GPRIORS_H_ |
|
| 13 | ||
| 14 |
#include "rcppExport.h" |
|
| 15 |
#include "functionWraps.h" |
|
| 16 | ||
| 17 | ||
| 18 |
// ***************************************************************************************************// |
|
| 19 | ||
| 20 |
// Virtual base class for all g priors. |
|
| 21 |
struct GPrior |
|
| 22 |
{
|
|
| 23 |
// Log prior density |
|
| 24 |
virtual double |
|
| 25 |
logDens(double g) const = 0; |
|
| 26 | ||
| 27 |
// we need a virtual destructor here, |
|
| 28 |
// cf. Accelerated C++ pp. 242 ff. |
|
| 29 | 399x |
virtual ~GPrior(){}
|
| 30 | ||
| 31 |
// add method which is only overwritten by the incomplete inverse gamma class |
|
| 32 |
virtual double |
|
| 33 | 8659x |
getTBFLogMargLik(double residualDeviance, int df) const |
| 34 |
{
|
|
| 35 | 8659x |
return R_NaReal; |
| 36 |
} |
|
| 37 |
}; |
|
| 38 | ||
| 39 |
// ***************************************************************************************************// |
|
| 40 | ||
| 41 |
// Inverse gamma g prior |
|
| 42 |
class InvGammaGPrior : public GPrior |
|
| 43 |
{
|
|
| 44 |
public: |
|
| 45 |
// ctr |
|
| 46 | 42x |
InvGammaGPrior(double a, double b) : |
| 47 | 42x |
a(a), |
| 48 | 42x |
b(b) |
| 49 |
{
|
|
| 50 |
} |
|
| 51 | ||
| 52 |
// Log prior density |
|
| 53 |
double |
|
| 54 | 98007x |
logDens(double g) const |
| 55 |
{
|
|
| 56 | 98007x |
return - (a + 1.0) * log(g) - b / g + a * log(b) - Rf_lgammafn(a); |
| 57 |
} |
|
| 58 | ||
| 59 |
private: |
|
| 60 |
// the parameters for the inverse gamma density |
|
| 61 |
const double a; |
|
| 62 |
const double b; |
|
| 63 |
}; |
|
| 64 | ||
| 65 |
// ***************************************************************************************************// |
|
| 66 | ||
| 67 |
// Incomplete Inverse gamma g prior |
|
| 68 | ||
| 69 |
class IncInvGammaGPrior : public GPrior |
|
| 70 |
{
|
|
| 71 |
public: |
|
| 72 |
// ctr |
|
| 73 | ! |
IncInvGammaGPrior(double a, double b) : |
| 74 | ! |
a(a), |
| 75 | ! |
b(b) |
| 76 |
{
|
|
| 77 |
} |
|
| 78 | ||
| 79 |
// Log prior density |
|
| 80 |
double |
|
| 81 |
logDens(double g) const; |
|
| 82 | ||
| 83 |
// for this class we have a closed form for the log marginal likelihood |
|
| 84 |
// resulting from the TBF approach |
|
| 85 |
double |
|
| 86 |
getTBFLogMargLik(double residualDeviance, int df) const; |
|
| 87 | ||
| 88 |
private: |
|
| 89 |
// the parameters for the inverse gamma density |
|
| 90 |
const double a; |
|
| 91 |
const double b; |
|
| 92 |
}; |
|
| 93 | ||
| 94 |
// ***************************************************************************************************// |
|
| 95 | ||
| 96 |
// Hyper-g prior |
|
| 97 |
class HypergPrior : public GPrior |
|
| 98 |
{
|
|
| 99 |
public: |
|
| 100 |
// ctr |
|
| 101 | ! |
HypergPrior(double a) : |
| 102 | ! |
a(a) |
| 103 |
{
|
|
| 104 |
} |
|
| 105 | ||
| 106 |
// Log prior density |
|
| 107 |
double |
|
| 108 | ! |
logDens(double g) const |
| 109 |
{
|
|
| 110 | ! |
return log(a - 2.0) - M_LN2 - (a / 2.0) * log1p(g); |
| 111 |
} |
|
| 112 | ||
| 113 |
private: |
|
| 114 |
// the hyperparameter |
|
| 115 |
const double a; |
|
| 116 |
}; |
|
| 117 | ||
| 118 |
// ***************************************************************************************************// |
|
| 119 | ||
| 120 |
// Custom g-prior |
|
| 121 |
class CustomGPrior : public GPrior |
|
| 122 |
{
|
|
| 123 |
public: |
|
| 124 |
// ctr |
|
| 125 | 357x |
CustomGPrior(SEXP R_function) : |
| 126 | 357x |
wrappedRfunction(R_function) |
| 127 |
{
|
|
| 128 |
} |
|
| 129 | ||
| 130 |
// Log prior density |
|
| 131 |
double |
|
| 132 | 1123514x |
logDens(double g) const |
| 133 |
{
|
|
| 134 | 1123514x |
return wrappedRfunction(g); |
| 135 |
} |
|
| 136 | ||
| 137 |
private: |
|
| 138 |
// the wrapped R function |
|
| 139 |
const RFunction wrappedRfunction; |
|
| 140 |
}; |
|
| 141 | ||
| 142 |
// ***************************************************************************************************// |
|
| 143 | ||
| 144 | ||
| 145 |
#endif /* GPRIORS_H_ */ |
| 1 |
/* |
|
| 2 |
* links.h |
|
| 3 |
* |
|
| 4 |
* Created on: 15.03.2010 |
|
| 5 |
* Author: daniel |
|
| 6 |
* |
|
| 7 |
* Classes for GLM link functions. |
|
| 8 |
* |
|
| 9 |
*/ |
|
| 10 | ||
| 11 |
#ifndef LINKS_H_ |
|
| 12 |
#define LINKS_H_ |
|
| 13 | ||
| 14 |
#include "rcppExport.h" |
|
| 15 | ||
| 16 |
static const double THRESH = 30.; |
|
| 17 |
static const double MTHRESH = -30.; |
|
| 18 |
static const double INVEPS = 1 / DBL_EPSILON; |
|
| 19 | ||
| 20 | ||
| 21 |
// ***************************************************************************************************// |
|
| 22 | ||
| 23 |
// The general virtual Link class, which includes |
|
| 24 |
// the link function itself, |
|
| 25 |
// the inverse link function (or response function) |
|
| 26 |
// and the derivative of the response function. |
|
| 27 |
class Link |
|
| 28 |
{
|
|
| 29 |
public: |
|
| 30 |
// the link function itself |
|
| 31 |
virtual double |
|
| 32 |
linkfun(double mu) const = 0; |
|
| 33 | ||
| 34 |
// the inverse link function (or response function) |
|
| 35 |
virtual double |
|
| 36 |
linkinv(double eta) const = 0; |
|
| 37 | ||
| 38 |
// and the derivative of the response function |
|
| 39 |
virtual double |
|
| 40 |
mu_eta(double eta) const = 0; |
|
| 41 | ||
| 42 |
// we need a virtual destructor here, |
|
| 43 |
// cf. Accelerated C++ pp. 242 ff. |
|
| 44 | 912x |
virtual ~Link(){}
|
| 45 |
}; |
|
| 46 | ||
| 47 |
// ***************************************************************************************************// |
|
| 48 | ||
| 49 |
// the Logit link class |
|
| 50 |
// inspiration taken from R/src/library/stats/src/family.c, |
|
| 51 |
// because for numerical stability of the IWLS we REALLY NEED the thresholds... |
|
| 52 |
class LogitLink : public Link |
|
| 53 |
{
|
|
| 54 |
private: |
|
| 55 | ||
| 56 |
/** |
|
| 57 |
* Evaluate x/(1 - x). An inline function is used so that x is |
|
| 58 |
* evaluated once only. |
|
| 59 |
*/ |
|
| 60 | 192x |
static double x_d_omx(double x) |
| 61 |
{
|
|
| 62 | 192x |
if (x < 0 || x > 1) |
| 63 | ! |
Rf_error("Value %f out of range (0, 1)", x);
|
| 64 | 192x |
return x / (1 - x); |
| 65 |
} |
|
| 66 | ||
| 67 |
/** |
|
| 68 |
* Evaluate x/(1 + x). An inline function is used so that x is |
|
| 69 |
* evaluated once only. |
|
| 70 |
*/ |
|
| 71 | 594784608x |
static double x_d_opx(double x) |
| 72 |
{
|
|
| 73 | 594784608x |
return x / (1 + x); |
| 74 |
} |
|
| 75 | ||
| 76 |
public: |
|
| 77 |
// logit |
|
| 78 |
double |
|
| 79 | 192x |
linkfun(double mu) const |
| 80 |
{
|
|
| 81 | 192x |
return log(x_d_omx(mu)); |
| 82 |
} |
|
| 83 | ||
| 84 |
// inverse logit |
|
| 85 |
double |
|
| 86 | 594784608x |
linkinv(double eta) const |
| 87 |
{
|
|
| 88 |
double tmp = (eta < MTHRESH) ? DBL_EPSILON : |
|
| 89 |
((eta > THRESH) ? INVEPS : exp(eta)); |
|
| 90 | 594784608x |
return x_d_opx(tmp); |
| 91 |
} |
|
| 92 | ||
| 93 |
// function(eta) h'(eta) where h is the inverse logit |
|
| 94 |
double |
|
| 95 | 365377696x |
mu_eta(double eta) const |
| 96 |
{
|
|
| 97 | 365377696x |
double opexp = 1 + exp(eta); |
| 98 |
double ret = (eta > THRESH || eta < MTHRESH) ? DBL_EPSILON : |
|
| 99 | 365377696x |
exp(eta) / (opexp * opexp); |
| 100 | 365377696x |
return ret; |
| 101 |
} |
|
| 102 |
}; |
|
| 103 | ||
| 104 | ||
| 105 |
// ***************************************************************************************************// |
|
| 106 | ||
| 107 |
// the Cauchit link class |
|
| 108 |
class CauchitLink : public Link |
|
| 109 |
{
|
|
| 110 |
public: |
|
| 111 |
// ctr |
|
| 112 |
CauchitLink() : |
|
| 113 |
thresh(- Rf_qcauchy(DBL_EPSILON, 0.0, 1.0, 1, 0)) |
|
| 114 |
{
|
|
| 115 |
} |
|
| 116 | ||
| 117 |
// cauchit == cauchy quantile |
|
| 118 |
double |
|
| 119 |
linkfun(double mu) const |
|
| 120 |
{
|
|
| 121 |
return Rf_qcauchy(mu, 0.0, 1.0, 1, 0); |
|
| 122 |
} |
|
| 123 | ||
| 124 |
// inverse cauchit == cauchy cdf |
|
| 125 |
// compare in R: binomial("cauchit")$linkinv
|
|
| 126 |
double |
|
| 127 |
linkinv(double eta) const |
|
| 128 |
{
|
|
| 129 |
eta = fmin(fmax(eta, - thresh), thresh); |
|
| 130 |
return Rf_pcauchy(eta, 0.0, 1.0, 1, 0); |
|
| 131 |
} |
|
| 132 | ||
| 133 |
// cauchy density |
|
| 134 |
double |
|
| 135 |
mu_eta(double eta) const |
|
| 136 |
{
|
|
| 137 |
return fmax(Rf_dcauchy(eta, 0.0, 1.0, 0), DBL_EPSILON); |
|
| 138 |
} |
|
| 139 | ||
| 140 |
private: |
|
| 141 |
const double thresh; |
|
| 142 |
}; |
|
| 143 | ||
| 144 |
// ***************************************************************************************************// |
|
| 145 | ||
| 146 |
// the Probit link class |
|
| 147 |
class ProbitLink : public Link |
|
| 148 |
{
|
|
| 149 |
public: |
|
| 150 |
// ctr |
|
| 151 | ! |
ProbitLink() : |
| 152 | ! |
thresh(- Rf_qnorm5(DBL_EPSILON, 0.0, 1.0, 1, 0)) |
| 153 |
{
|
|
| 154 |
} |
|
| 155 | ||
| 156 |
// probit == normal quantile |
|
| 157 |
double |
|
| 158 | ! |
linkfun(double mu) const |
| 159 |
{
|
|
| 160 | ! |
return Rf_qnorm5(mu, 0.0, 1.0, 1, 0); |
| 161 |
} |
|
| 162 | ||
| 163 |
// inverse probit == normal cdf |
|
| 164 |
// compare in R: binomial("probit")$linkinv
|
|
| 165 |
double |
|
| 166 | ! |
linkinv(double eta) const |
| 167 |
{
|
|
| 168 | ! |
eta = fmin(fmax(eta, - thresh), thresh); |
| 169 | ! |
return Rf_pnorm5(eta, 0.0, 1.0, 1, 0); |
| 170 |
} |
|
| 171 | ||
| 172 |
// normal density |
|
| 173 |
double |
|
| 174 | ! |
mu_eta(double eta) const |
| 175 |
{
|
|
| 176 | ! |
return fmax(Rf_dnorm4(eta, 0.0, 1.0, 0), DBL_EPSILON); |
| 177 |
} |
|
| 178 | ||
| 179 |
private: |
|
| 180 |
const double thresh; |
|
| 181 |
}; |
|
| 182 | ||
| 183 |
// ***************************************************************************************************// |
|
| 184 | ||
| 185 |
// the complementary log-log link class |
|
| 186 |
class CloglogLink : public Link |
|
| 187 |
{
|
|
| 188 |
public: |
|
| 189 |
// cloglog |
|
| 190 |
double |
|
| 191 | ! |
linkfun(double mu) const |
| 192 |
{
|
|
| 193 | ! |
return log(- log(1.0 - mu)); |
| 194 |
} |
|
| 195 | ||
| 196 |
// compare in R: binomial("cloglog")$linkinv
|
|
| 197 |
double |
|
| 198 | ! |
linkinv(double eta) const |
| 199 |
{
|
|
| 200 | ! |
return fmax(fmin(- expm1(- exp(eta)), 1 - DBL_EPSILON), DBL_EPSILON); |
| 201 |
} |
|
| 202 | ||
| 203 |
// compare in R: binomial("cloglog")$mu.eta
|
|
| 204 |
double |
|
| 205 | ! |
mu_eta(double eta) const |
| 206 |
{
|
|
| 207 | ! |
eta = fmin(eta, 700.0); |
| 208 | ! |
return fmax(exp(eta) * exp(- exp(eta)), DBL_EPSILON); |
| 209 |
} |
|
| 210 |
}; |
|
| 211 | ||
| 212 |
// ***************************************************************************************************// |
|
| 213 | ||
| 214 |
// the inverse link class |
|
| 215 |
class InverseLink : public Link |
|
| 216 |
{
|
|
| 217 |
public: |
|
| 218 |
// inverse is the link |
|
| 219 |
double |
|
| 220 | ! |
linkfun(double mu) const |
| 221 |
{
|
|
| 222 | ! |
return 1.0 / mu; |
| 223 |
} |
|
| 224 | ||
| 225 |
// and also the response function |
|
| 226 |
double |
|
| 227 | ! |
linkinv(double eta) const |
| 228 |
{
|
|
| 229 | ! |
return 1.0 / eta; |
| 230 |
} |
|
| 231 | ||
| 232 |
// finally the response derivative |
|
| 233 |
double |
|
| 234 | ! |
mu_eta(double eta) const |
| 235 |
{
|
|
| 236 | ! |
return - 1.0 / eta / eta; |
| 237 |
} |
|
| 238 |
}; |
|
| 239 | ||
| 240 |
// ***************************************************************************************************// |
|
| 241 | ||
| 242 |
// the log link class |
|
| 243 |
class LogLink : public Link |
|
| 244 |
{
|
|
| 245 |
public: |
|
| 246 |
// log is the link |
|
| 247 |
double |
|
| 248 | ! |
linkfun(double mu) const |
| 249 |
{
|
|
| 250 | ! |
return log(mu); |
| 251 |
} |
|
| 252 | ||
| 253 |
// so the response function is exp |
|
| 254 |
double |
|
| 255 | ! |
linkinv(double eta) const |
| 256 |
{
|
|
| 257 | ! |
return fmax(exp(eta), DBL_EPSILON); |
| 258 |
} |
|
| 259 | ||
| 260 |
// finally the response derivative is also exp |
|
| 261 |
double |
|
| 262 | ! |
mu_eta(double eta) const |
| 263 |
{
|
|
| 264 | ! |
return fmax(exp(eta), DBL_EPSILON); |
| 265 |
} |
|
| 266 |
}; |
|
| 267 | ||
| 268 |
// ***************************************************************************************************// |
|
| 269 | ||
| 270 |
// the identity link class |
|
| 271 |
class IdentityLink : public Link |
|
| 272 |
{
|
|
| 273 |
public: |
|
| 274 |
// identity is the link ... |
|
| 275 |
double |
|
| 276 | 1632x |
linkfun(double mu) const |
| 277 |
{
|
|
| 278 | 1632x |
return mu; |
| 279 |
} |
|
| 280 | ||
| 281 |
// ... and response function |
|
| 282 |
double |
|
| 283 | 816x |
linkinv(double eta) const |
| 284 |
{
|
|
| 285 | 816x |
return eta; |
| 286 |
} |
|
| 287 | ||
| 288 |
// finally the response derivative is 1 |
|
| 289 |
double |
|
| 290 | 816x |
mu_eta(double eta) const |
| 291 |
{
|
|
| 292 | 816x |
return 1.0; |
| 293 |
} |
|
| 294 |
}; |
|
| 295 |
#endif /* LINKS_H_ */ |
| 1 |
/* |
|
| 2 |
* rcppExport.h |
|
| 3 |
* |
|
| 4 |
* Created on: 13.10.2010 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef RCPPEXPORT_H_ |
|
| 9 |
#define RCPPEXPORT_H_ |
|
| 10 | ||
| 11 |
#include <RcppCommon.h> |
|
| 12 | ||
| 13 |
// load the library defining the templated class |
|
| 14 |
#include <set> |
|
| 15 | ||
| 16 |
// declaring the partial specialization |
|
| 17 |
namespace Rcpp |
|
| 18 |
{
|
|
| 19 |
namespace traits |
|
| 20 |
{
|
|
| 21 |
template <typename T> class Exporter<std::set<T> >; |
|
| 22 |
} |
|
| 23 |
} |
|
| 24 | ||
| 25 | ||
| 26 |
// this must appear after the specialization, |
|
| 27 |
// otherwise the specialization will not be seen by Rcpp types |
|
| 28 |
#include <RcppArmadillo.h> |
|
| 29 | ||
| 30 | ||
| 31 |
// only now the implementation of the new Exporter class: |
|
| 32 |
namespace Rcpp |
|
| 33 |
{
|
|
| 34 |
namespace traits |
|
| 35 |
{
|
|
| 36 |
template <typename T> class Exporter<std::set<T> > |
|
| 37 |
{
|
|
| 38 |
public: |
|
| 39 |
// we need a constructor taking an SEXP |
|
| 40 | 1344x |
Exporter(SEXP s) : |
| 41 | 1344x |
_x(s) |
| 42 |
{
|
|
| 43 |
} |
|
| 44 | ||
| 45 |
// and we need a method called "get" that returns an instance of the wished type: |
|
| 46 |
inline std::set<T> |
|
| 47 | 1344x |
get() |
| 48 |
{
|
|
| 49 |
// allocate return set |
|
| 50 | 1344x |
std::set<T> x; |
| 51 | ||
| 52 |
// insert all elements in _x into the return set x: |
|
| 53 | 3248x |
for (R_len_t i = 0; i < _x.size(); ++i) |
| 54 |
{
|
|
| 55 | 1904x |
x.insert(_x[i]); |
| 56 |
} |
|
| 57 | ||
| 58 |
// return the result |
|
| 59 | 1344x |
return x; |
| 60 |
} |
|
| 61 | ||
| 62 |
private: |
|
| 63 |
Rcpp::IntegerVector _x; |
|
| 64 |
}; |
|
| 65 |
} |
|
| 66 |
} |
|
| 67 | ||
| 68 | ||
| 69 |
#endif /* RCPPEXPORT_H_ */ |
| 1 |
#ifndef SUM_H_ |
|
| 2 |
#define SUM_H_ |
|
| 3 | ||
| 4 |
// Source: http://oldmill.uchicago.edu/~wilder/Code/sum/ |
|
| 5 | ||
| 6 |
#include <cmath> |
|
| 7 |
#include <vector> |
|
| 8 | ||
| 9 |
using std::vector; |
|
| 10 | ||
| 11 |
//======================================================================== |
|
| 12 |
// The condensed summation algorithm of Kahan. Avoids common round-off |
|
| 13 |
// errors in computing the sum of a bunch of numbers. It works well for |
|
| 14 |
// most cases, but can fail badly when there is cancellation. The |
|
| 15 |
// slower modified_deflation algorithm below does better in those cases. |
|
| 16 | ||
| 17 |
template<class T> |
|
| 18 |
T |
|
| 19 | 76896x |
condensed_summation(const vector<T>& v) |
| 20 |
{
|
|
| 21 | 76896x |
T a, b, sum = 0.0, error = 0.0; |
| 22 | 1169440x |
for (typename vector<T>::const_iterator i = v.begin(); i != v.end(); ++i) |
| 23 |
{
|
|
| 24 | 1092544x |
a = sum; |
| 25 | 1092544x |
b = * i + error; |
| 26 | 1092544x |
sum = a + b; |
| 27 | 1092544x |
error = (a - sum) + b; |
| 28 |
} |
|
| 29 | 76896x |
return sum; |
| 30 | ||
| 31 |
} // condensed_summation |
|
| 32 | ||
| 33 |
//======================================================================== |
|
| 34 |
// The modified deflation algorithm of Anderson. It is reasonably fast, |
|
| 35 |
// and should give the correct result as it is difficult if not impossible |
|
| 36 |
// to do better without increasing the precision of the variables. The |
|
| 37 |
// portion of the algorithm that handles potentially infinite loops has |
|
| 38 |
// been modified as the original version did not always work in my tests. |
|
| 39 |
// I believe the failures were due to errors in g++ optimizations and |
|
| 40 |
// also believe that my code still has an error. |
|
| 41 | ||
| 42 |
template<class T> |
|
| 43 |
T |
|
| 44 | 25632x |
modified_deflation(const vector<T>& v) |
| 45 |
{
|
|
| 46 | 25632x |
if (v.size() < 3) |
| 47 | ! |
return condensed_summation(v); |
| 48 | ||
| 49 |
// Set up several vectors |
|
| 50 | 25632x |
vector<T> vp; |
| 51 | 25632x |
vector<T> vn; |
| 52 | 25632x |
vector<T> e; |
| 53 | ||
| 54 |
// we do not need to do this: |
|
| 55 |
// vp.reserve(v.size()); |
|
| 56 |
// vn.reserve(v.size()); |
|
| 57 |
// e.reserve(v.size()); |
|
| 58 | ||
| 59 |
// Initialize vectors of negative and positive elements of v |
|
| 60 | 25632x |
for (typename vector<T>::const_iterator |
| 61 | 25632x |
i = v.begin(); |
| 62 | 571904x |
i != v.end(); |
| 63 | 546272x |
++i) |
| 64 | 546272x |
if (* i < 0.0) |
| 65 | 14784x |
vn.push_back(* i); |
| 66 | 531488x |
else if (* i > 0.0) |
| 67 | 531488x |
vp.push_back(* i); |
| 68 | ||
| 69 | ||
| 70 |
// immediately return 0 if there are no negative or positive elements |
|
| 71 |
if(vn.empty() && vp.empty()) |
|
| 72 | ! |
return static_cast<T>(0.0); |
| 73 | ||
| 74 |
// now we are sure there is at least one non-zero element, |
|
| 75 |
// and can start with the summation algorithm. |
|
| 76 | ||
| 77 |
T a, b, sum, error, sp, sn; |
|
| 78 | ||
| 79 | 25632x |
bool well_conditioned = false; |
| 80 | 51264x |
while (! well_conditioned) |
| 81 |
{
|
|
| 82 |
// Deflate the last elements of vp and vn. |
|
| 83 |
while (! vp.empty() && ! vn.empty()) |
|
| 84 |
{
|
|
| 85 | ! |
a = vp.back(); |
| 86 | ! |
vp.pop_back(); |
| 87 | ! |
b = vn.back(); |
| 88 | ! |
vn.pop_back(); |
| 89 | ! |
sum = a + b; |
| 90 | ! |
error = (a - sum) + b; |
| 91 | ! |
if (sum == a) |
| 92 |
{ // |a| >> |b|
|
|
| 93 | ! |
T tmp1 = a / 2.0; |
| 94 | ! |
T tmp2 = a - tmp1; |
| 95 | ! |
vp.push_back(tmp2); |
| 96 | ! |
vp.push_back(tmp1); |
| 97 | ! |
vn.push_back(b); |
| 98 |
} |
|
| 99 | ! |
else if (sum == b) |
| 100 |
{ // |b| >> |a|
|
|
| 101 | ! |
T tmp1 = b / 2.0; |
| 102 | ! |
T tmp2 = b - tmp1; |
| 103 | ! |
vp.push_back(a); |
| 104 | ! |
vn.push_back(tmp2); |
| 105 | ! |
vn.push_back(tmp1); |
| 106 |
} |
|
| 107 |
else |
|
| 108 |
{
|
|
| 109 | ! |
if (sum < 0.0) |
| 110 | ! |
vn.push_back(sum); |
| 111 | ! |
else if (sum > 0.0) |
| 112 | ! |
vp.push_back(sum); |
| 113 | ||
| 114 | ! |
if (error != 0.0) |
| 115 | ! |
e.push_back(error); |
| 116 |
} |
|
| 117 |
} |
|
| 118 | ||
| 119 |
// Put the error terms back in the vp and vn arrays. |
|
| 120 | 25632x |
for (typename vector<T>::iterator |
| 121 | 25632x |
i = e.begin(); |
| 122 | 25632x |
i != e.end(); |
| 123 | ! |
++i) |
| 124 |
{
|
|
| 125 | ! |
if (* i < 0.0) |
| 126 | ! |
vn.push_back(* i); |
| 127 | ! |
else if (* i > 0.0) |
| 128 | ! |
vp.push_back(* i); |
| 129 |
} |
|
| 130 | ||
| 131 | 25632x |
e.clear(); |
| 132 | ||
| 133 |
// Check that the sums in vp and vn are well-conditioned. |
|
| 134 | 25632x |
sp = condensed_summation(vp); |
| 135 | 25632x |
sn = condensed_summation(vn); |
| 136 | 25632x |
well_conditioned = (std::abs((sp + sn) / (sp - sn)) == 1.0); |
| 137 |
} |
|
| 138 | ||
| 139 | 25632x |
vector<T> vnew; |
| 140 | 25632x |
vnew.reserve(vp.size() + vn.size()); |
| 141 | ||
| 142 | 25632x |
vnew.insert(vnew.end(), vp.begin(), vp.end()); |
| 143 | 25632x |
vnew.insert(vnew.end(), vn.begin(), vn.end()); |
| 144 | ||
| 145 | 25632x |
return condensed_summation(vnew); |
| 146 | ||
| 147 | 25632x |
} // modified_deflation |
| 148 | ||
| 149 | ||
| 150 |
#endif /*SUM_H_*/ |
| 1 |
/* |
|
| 2 |
* design.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 09.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#include "design.h" |
|
| 9 |
#include "fpUcHandling.h" |
|
| 10 |
#include "dataStructure.h" |
|
| 11 | ||
| 12 |
#include "rcppExport.h" |
|
| 13 |
#include "types.h" |
|
| 14 | ||
| 15 |
#include <algorithm> |
|
| 16 | ||
| 17 | ||
| 18 |
// get different concatenated columns of matrix |
|
| 19 |
AMatrix |
|
| 20 | 23505x |
getMultipleCols(const AMatrix& m, |
| 21 |
const PosIntVector& s) // expect that the column numbers s are 1-based! |
|
| 22 |
{
|
|
| 23 | 23505x |
AMatrix ret(m.n_rows, s.size()); |
| 24 | ||
| 25 | 23505x |
PosIntVector::size_type cols = 0; // invariant: about to process column nr. "cols" |
| 26 | 55440x |
for (PosIntVector::const_iterator i = s.begin(); i != s.end(); i++) |
| 27 |
{
|
|
| 28 | 95805x |
ret.col(cols++) = m.col(*i - 1); // so we must subtract 1 here |
| 29 |
} |
|
| 30 | ||
| 31 | 23505x |
return ret; |
| 32 |
} |
|
| 33 | ||
| 34 | ||
| 35 |
// ***************************************************************************************************// |
|
| 36 | ||
| 37 |
// build Fp basis matrix from vector, power indices and power set for one covariate |
|
| 38 |
AMatrix |
|
| 39 | 43470x |
getFpMatrix(const std::vector<AVector>& tcols, |
| 40 |
const Powers& powerinds, |
|
| 41 |
const DataValues& data) |
|
| 42 |
{
|
|
| 43 | 43470x |
const int logInd = 3; // this index corresponds to power = 0, i.e. log. |
| 44 | 43470x |
const int nrow = tcols.at(1).n_elem; |
| 45 | ||
| 46 |
// get the log column |
|
| 47 | 43470x |
const AVector& logColumn = tcols.at(logInd); |
| 48 | ||
| 49 |
// this will be the returned matrix |
|
| 50 | 43470x |
AMatrix ret(nrow, powerinds.size()); |
| 51 | ||
| 52 |
// start recursion |
|
| 53 | 43470x |
int lastInd = logInd; |
| 54 | 86940x |
AVector lastCol = arma::ones<AVector>(nrow); |
| 55 | ||
| 56 |
// there is at least one power present |
|
| 57 | 43470x |
Powers::size_type cols = 0; // invariant: about to process column number "cols" |
| 58 | ||
| 59 | 43470x |
for (Powers::const_iterator |
| 60 | 43470x |
now = powerinds.begin(); |
| 61 | 87180x |
now != powerinds.end(); |
| 62 | 43710x |
now++) |
| 63 |
{
|
|
| 64 | 43710x |
if (*now == lastInd) |
| 65 |
{ // repeated powers case:
|
|
| 66 | ||
| 67 |
// elementwise multiplication (Schur product): |
|
| 68 | 12120x |
lastCol = lastCol % logColumn; |
| 69 |
} |
|
| 70 |
else |
|
| 71 |
{ // normal case
|
|
| 72 | ||
| 73 | 37650x |
lastInd = *now; |
| 74 | 37650x |
lastCol = tcols.at(lastInd); |
| 75 |
} |
|
| 76 |
// center the column |
|
| 77 | 131130x |
ret.col(cols++) = lastCol - arma::mean(lastCol); |
| 78 |
} |
|
| 79 | ||
| 80 | 86940x |
return ret; |
| 81 |
} |
|
| 82 | ||
| 83 |
// ***************************************************************************************************// |
|
| 84 | ||
| 85 |
// construct centered design matrix including intercept for the model |
|
| 86 |
// optionally the intercept column is not included |
|
| 87 |
AMatrix |
|
| 88 | 24135x |
getDesignMatrix(const ModelPar &mod, |
| 89 |
const DataValues &data, |
|
| 90 |
const FpInfo &fpInfo, |
|
| 91 |
const UcInfo& ucInfo, |
|
| 92 |
const FixInfo& fixInfo, |
|
| 93 |
bool includeIntercept) |
|
| 94 |
{
|
|
| 95 |
// total number of columns |
|
| 96 | 24135x |
int nColumns = mod.size(ucInfo, fixInfo); |
| 97 | 24135x |
if(includeIntercept) |
| 98 |
{
|
|
| 99 | 1950x |
++nColumns; |
| 100 |
} |
|
| 101 | ||
| 102 |
// initialize the return matrix |
|
| 103 | 24135x |
AMatrix ret(data.nObs, nColumns); |
| 104 | ||
| 105 |
// invariant: nextColumn is the next column to be written |
|
| 106 | 24135x |
PosInt nextColumn = 0; |
| 107 | ||
| 108 |
// start with the intercept column? |
|
| 109 | 24135x |
if(includeIntercept) |
| 110 |
{
|
|
| 111 | 3900x |
ret.col(0) = data.onesVector; |
| 112 | 1950x |
++nextColumn; |
| 113 |
} |
|
| 114 | ||
| 115 |
// go on with centered fp matrices |
|
| 116 | 95025x |
for (PosInt i = 0; i != fpInfo.nFps; i++) |
| 117 |
{
|
|
| 118 | 70890x |
Powers powersi = mod.fpPars.at(i); |
| 119 | ||
| 120 | 70890x |
if (! powersi.empty()) |
| 121 |
{
|
|
| 122 |
// what is the end column in the return matrix? |
|
| 123 | 43470x |
PosInt endColumn = nextColumn + powersi.size() - 1; |
| 124 | ||
| 125 |
// insert the centered FP matrix into the return matrix |
|
| 126 | 86940x |
ret.cols(nextColumn, endColumn) = getFpMatrix(fpInfo.tcols.at(i), powersi, data); |
| 127 | ||
| 128 |
// correct invariant |
|
| 129 | 43470x |
nextColumn = endColumn + 1; |
| 130 |
} |
|
| 131 |
} |
|
| 132 | ||
| 133 |
// centered uc matrices |
|
| 134 | 24135x |
for(IntSet::const_iterator |
| 135 | 24135x |
g = mod.ucPars.begin(); |
| 136 | 47640x |
g != mod.ucPars.end(); |
| 137 | 23505x |
++g) |
| 138 |
{
|
|
| 139 |
// the C index is |
|
| 140 | 23505x |
Int i = *g - 1; |
| 141 | ||
| 142 |
// what is the column list for this covariate? |
|
| 143 | 23505x |
PosIntVector thisColList = ucInfo.ucColList.at(i); |
| 144 | ||
| 145 |
// what is the end column in the return matrix? |
|
| 146 | 23505x |
PosInt endColumn = nextColumn + thisColList.size() - 1; |
| 147 | ||
| 148 |
// insert the centered UC matrix into the return matrix |
|
| 149 | 47010x |
ret.cols(nextColumn, endColumn) = getMultipleCols(data.centeredDesign, thisColList); |
| 150 | ||
| 151 |
// correct invariant |
|
| 152 | 23505x |
nextColumn = endColumn + 1; |
| 153 |
} |
|
| 154 |
|
|
| 155 |
|
|
| 156 |
// centered fix matrices |
|
| 157 | 24135x |
for(IntSet::const_iterator |
| 158 | 24135x |
g = mod.fixPars.begin(); |
| 159 | 24135x |
g != mod.fixPars.end(); |
| 160 | ! |
++g) |
| 161 |
{
|
|
| 162 |
// the C index is |
|
| 163 | ! |
Int i = *g - 1; |
| 164 |
|
|
| 165 |
// what is the column list for this covariate? |
|
| 166 | ! |
PosIntVector thisColList = fixInfo.fixColList.at(i); |
| 167 |
|
|
| 168 |
// what is the end column in the return matrix? |
|
| 169 | ! |
PosInt endColumn = nextColumn + thisColList.size() - 1; |
| 170 |
|
|
| 171 |
// insert the centered UC matrix into the return matrix |
|
| 172 | ! |
ret.cols(nextColumn, endColumn) = getMultipleCols(data.centeredDesign, thisColList); |
| 173 |
|
|
| 174 |
// correct invariant |
|
| 175 | ! |
nextColumn = endColumn + 1; |
| 176 |
} |
|
| 177 |
|
|
| 178 | ||
| 179 | 24135x |
return ret; |
| 180 |
} |
|
| 181 | ||
| 182 |
// ***************************************************************************************************// |
|
| 183 | ||
| 184 | ||
| 185 | ||
| 186 |
// End of file. |
| 1 |
/* |
|
| 2 |
* distributions.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 15.03.2010 |
|
| 5 |
* Author: daniel |
|
| 6 |
* |
|
| 7 |
* Implementation of classes for exponential family distributions, |
|
| 8 |
* i.e. functions which should not be inlined by the compiler. |
|
| 9 |
* |
|
| 10 |
*/ |
|
| 11 | ||
| 12 |
#include "distributions.h" |
|
| 13 |
#include <string> |
|
| 14 |
#include "types.h" |
|
| 15 |
#include "rcppExport.h" |
|
| 16 | ||
| 17 |
// ***************************************************************************************************// |
|
| 18 | ||
| 19 |
// helper function for the binomial and Poisson loglikelihood |
|
| 20 |
// see R/src/library/stats/src/family.c for the original R source code. |
|
| 21 |
static inline double |
|
| 22 | 208558896x |
y_log_y(double y, double mu) |
| 23 |
{
|
|
| 24 | 208558896x |
return (y > 0.0) ? (y * log(y / mu)) : 0.0; |
| 25 |
} |
|
| 26 | ||
| 27 |
// ***************************************************************************************************// |
|
| 28 | ||
| 29 | ||
| 30 |
// binomial loglikelihood |
|
| 31 |
// compare in R: binomial()$dev.resids |
|
| 32 |
double |
|
| 33 | 196014x |
Binomial::loglik(const double *means) const |
| 34 |
{
|
|
| 35 | 196014x |
double ret = 0.0; |
| 36 | ||
| 37 | 104475462x |
for(PosInt i = 0; i < responses.n_elem; ++i, ++means) |
| 38 |
{
|
|
| 39 | 417117792x |
ret += weights(i) * (y_log_y(responses(i), *means) + y_log_y(1.0 - responses(i), 1.0 - *means)); |
| 40 |
} |
|
| 41 | ||
| 42 | 196014x |
return - ret; |
| 43 |
} |
|
| 44 | ||
| 45 |
// ***************************************************************************************************// |
|
| 46 | ||
| 47 | ||
| 48 |
// Gaussian loglikelihood |
|
| 49 |
// compare in R: poisson()$dev.resids |
|
| 50 |
double |
|
| 51 | ! |
Gaussian::loglik(const double *means) const |
| 52 |
{
|
|
| 53 | ! |
double ret = 0.0; |
| 54 | ||
| 55 | ! |
for(PosInt i = 0; i < responses.n_elem; ++i, ++means) |
| 56 |
{
|
|
| 57 | ! |
ret += weights(i) * (responses(i) - *means) * (responses(i) - *means); |
| 58 |
} |
|
| 59 | ||
| 60 | ! |
return - 0.5 * ret / phi; |
| 61 |
} |
|
| 62 | ||
| 63 |
// ***************************************************************************************************// |
|
| 64 | ||
| 65 | ||
| 66 |
// Poisson loglikelihood |
|
| 67 |
// compare in R: poisson()$dev.resids |
|
| 68 |
double |
|
| 69 | ! |
Poisson::loglik(const double *means) const |
| 70 |
{
|
|
| 71 | ! |
double ret = 0.0; |
| 72 | ||
| 73 | ! |
for(PosInt i = 0; i < responses.n_elem; ++i, ++means) |
| 74 |
{
|
|
| 75 | ! |
ret += weights(i) * (responses(i) - *means - y_log_y(responses(i), *means)); |
| 76 |
} |
|
| 77 | ||
| 78 | ! |
return ret; |
| 79 |
} |
| 1 |
/* |
|
| 2 |
* evalZdensity.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 16.05.2010 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 | ||
| 9 |
#include "rcppExport.h" |
|
| 10 |
#include "dataStructure.h" |
|
| 11 |
#include "types.h" |
|
| 12 |
#include "zdensity.h" |
|
| 13 |
#include "fpUcHandling.h" |
|
| 14 |
#include <stdexcept> |
|
| 15 | ||
| 16 |
using namespace Rcpp; |
|
| 17 | ||
| 18 |
// ***************************************************************************************************// |
|
| 19 | ||
| 20 | ||
| 21 |
// R call is: |
|
| 22 |
// |
|
| 23 |
// samples <- .External(cpp_evalZdensity, |
|
| 24 |
// config, |
|
| 25 |
// attrs$data, |
|
| 26 |
// attrs$fpInfos, |
|
| 27 |
// attrs$ucInfos, |
|
| 28 |
// attrs$fixInfos, |
|
| 29 |
// attrs$distribution, |
|
| 30 |
// options) |
|
| 31 |
// [[Rcpp::export]] |
|
| 32 |
SEXP |
|
| 33 | 13x |
cpp_evalZdensity(List rcpp_config, List rcpp_data, List rcpp_fpInfos, List rcpp_ucInfos, |
| 34 |
List rcpp_fixInfos, List rcpp_distribution, List rcpp_options) |
|
| 35 |
{
|
|
| 36 |
// ---------------------------------------------------------------------------------- |
|
| 37 |
// extract arguments |
|
| 38 |
// ---------------------------------------------------------------------------------- |
|
| 39 | ||
| 40 |
// r_interface = CDR(r_interface); |
|
| 41 |
// List rcpp_config(CAR(r_interface)); |
|
| 42 |
// |
|
| 43 |
// r_interface = CDR(r_interface); |
|
| 44 |
// List rcpp_data(CAR(r_interface)); |
|
| 45 |
// |
|
| 46 |
// r_interface = CDR(r_interface); |
|
| 47 |
// List rcpp_fpInfos(CAR(r_interface)); |
|
| 48 |
// |
|
| 49 |
// r_interface = CDR(r_interface); |
|
| 50 |
// List rcpp_ucInfos(CAR(r_interface)); |
|
| 51 |
// |
|
| 52 |
// r_interface = CDR(r_interface); |
|
| 53 |
// List rcpp_fixInfos(CAR(r_interface)); |
|
| 54 |
// |
|
| 55 |
// r_interface = CDR(r_interface); |
|
| 56 |
// List rcpp_distribution(CAR(r_interface)); |
|
| 57 |
// |
|
| 58 |
// r_interface = CDR(r_interface); |
|
| 59 |
// List rcpp_options(CAR(r_interface)); |
|
| 60 | ||
| 61 |
// ---------------------------------------------------------------------------------- |
|
| 62 |
// unpack the R objects |
|
| 63 |
// ---------------------------------------------------------------------------------- |
|
| 64 | ||
| 65 |
// data: |
|
| 66 | ||
| 67 | 26x |
const NumericMatrix n_x = rcpp_data["x"]; |
| 68 | 13x |
const AMatrix x(n_x.begin(), n_x.nrow(), |
| 69 | 13x |
n_x.ncol()); |
| 70 | ||
| 71 | 26x |
const NumericMatrix n_xCentered = rcpp_data["xCentered"]; |
| 72 | 13x |
const AMatrix xCentered(n_xCentered.begin(), n_xCentered.nrow(), |
| 73 | 13x |
n_xCentered.ncol()); |
| 74 | ||
| 75 | 26x |
const NumericVector n_y = rcpp_data["y"]; |
| 76 | 13x |
const AVector y(n_y.begin(), n_y.size()); |
| 77 | ||
| 78 | 26x |
const IntVector censInd = as<IntVector>(rcpp_data["censInd"]); |
| 79 | ||
| 80 |
// FP configuration: |
|
| 81 | ||
| 82 |
// vector of maximum fp degrees |
|
| 83 | 26x |
const PosIntVector fpmaxs = as<PosIntVector>(rcpp_fpInfos["fpmaxs"]); |
| 84 |
// corresponding vector of fp column indices |
|
| 85 | 26x |
const PosIntVector fppos = rcpp_fpInfos["fppos"]; |
| 86 |
// corresponding vector of power set cardinalities |
|
| 87 | 26x |
const PosIntVector fpcards = rcpp_fpInfos["fpcards"]; |
| 88 |
// names of fp terms |
|
| 89 | 26x |
const StrVector fpnames = rcpp_fpInfos["fpnames"]; |
| 90 | ||
| 91 | ||
| 92 |
// UC configuration: |
|
| 93 | ||
| 94 | 26x |
const PosIntVector ucIndices = rcpp_ucInfos["ucIndices"]; |
| 95 | 26x |
List rcpp_ucColList = rcpp_ucInfos["ucColList"]; |
| 96 | ||
| 97 | 13x |
std::vector<PosIntVector> ucColList; |
| 98 |
for (R_len_t i = 0; i != rcpp_ucColList.length(); ++i) |
|
| 99 |
{
|
|
| 100 | ! |
ucColList.push_back(as<PosIntVector>(rcpp_ucColList[i])); |
| 101 |
} |
|
| 102 | ||
| 103 |
// fixed covariate configuration: |
|
| 104 |
|
|
| 105 | 26x |
const PosIntVector fixIndices = rcpp_fixInfos["fixIndices"]; |
| 106 | 26x |
List rcpp_fixColList = rcpp_fixInfos["fixColList"]; |
| 107 |
|
|
| 108 | 13x |
std::vector<PosIntVector> fixColList; |
| 109 |
for (R_len_t i = 0; i != rcpp_fixColList.length(); ++i) |
|
| 110 |
{
|
|
| 111 | ! |
fixColList.push_back(as<PosIntVector>(rcpp_fixColList[i])); |
| 112 |
} |
|
| 113 |
|
|
| 114 | ||
| 115 |
// distributions info: |
|
| 116 | ||
| 117 | 13x |
const double nullModelLogMargLik = as<double>(rcpp_distribution["nullModelLogMargLik"]); |
| 118 | 13x |
const double nullModelDeviance = as<double>(rcpp_distribution["nullModelDeviance"]); |
| 119 | 13x |
const double empiricalMean = as<double>(rcpp_distribution["yMean"]); |
| 120 | 26x |
S4 rcpp_gPrior = rcpp_distribution["gPrior"]; |
| 121 | 26x |
List rcpp_family = rcpp_distribution["family"]; |
| 122 | ||
| 123 | ||
| 124 | ||
| 125 |
// options: |
|
| 126 | ||
| 127 | 26x |
const MyDoubleVector zValues = rcpp_options["zValues"]; |
| 128 |
// const bool conditional = as<bool>(rcpp_options["conditional"]); |
|
| 129 |
// const bool debug = as<bool>(rcpp_options["debug"]); |
|
| 130 |
// const bool higherOrderCorrection = as<bool>(rcpp_options["higherOrderCorrection"]); |
|
| 131 | ||
| 132 | 26x |
const Book bookkeep(as<bool>(rcpp_distribution["tbf"]), |
| 133 | 26x |
as<bool>(rcpp_distribution["doGlm"]), |
| 134 |
false, |
|
| 135 | 26x |
as<bool>(rcpp_options["conditional"]), |
| 136 |
false, //useFixedc |
|
| 137 |
100, |
|
| 138 |
false, |
|
| 139 | 26x |
as<bool>(rcpp_options["debug"]), |
| 140 | 26x |
as<std::string>(rcpp_distribution["modelPrior"]), |
| 141 |
10, |
|
| 142 |
10, |
|
| 143 |
10, |
|
| 144 |
false, |
|
| 145 | 26x |
as<bool>(rcpp_options["debug"]), |
| 146 | 91x |
as<bool>(rcpp_options["higherOrderCorrection"])); |
| 147 | ||
| 148 | ||
| 149 |
// ---------------------------------------------------------------------------------- |
|
| 150 |
// further process arguments |
|
| 151 |
// ---------------------------------------------------------------------------------- |
|
| 152 | ||
| 153 |
// data: |
|
| 154 | ||
| 155 |
// only the intercept is always included, that is fixed, in the model |
|
| 156 | 13x |
IntSet fixedCols; |
| 157 | 13x |
fixedCols.insert(1); |
| 158 | ||
| 159 |
// totalnumber is set to 0 because we do not care about it. |
|
| 160 | 13x |
const DataValues data(x, xCentered, y, censInd, 0, fixedCols); |
| 161 | ||
| 162 |
// FP configuration: |
|
| 163 | 13x |
const FpInfo fpInfo(fpcards, fppos, fpmaxs, fpnames, x); |
| 164 | ||
| 165 |
// UC configuration: |
|
| 166 | ||
| 167 |
// determine sizes of the UC groups, and the total size == maximum size reached together by all |
|
| 168 |
// UC groups. |
|
| 169 | 13x |
PosIntVector ucSizes; |
| 170 | 13x |
PosInt maxUcDim = 0; |
| 171 |
for (std::vector<PosIntVector>::const_iterator cols = ucColList.begin(); cols != ucColList.end(); ++cols) |
|
| 172 |
{
|
|
| 173 | ! |
PosInt thisSize = cols->size(); |
| 174 | ||
| 175 | ! |
maxUcDim += thisSize; |
| 176 | ! |
ucSizes.push_back(thisSize); |
| 177 |
} |
|
| 178 | 13x |
const UcInfo ucInfo(ucSizes, maxUcDim, ucIndices, ucColList); |
| 179 | ||
| 180 |
|
|
| 181 |
|
|
| 182 |
|
|
| 183 |
// fix configuration: //TODO THIS SECTION |
|
| 184 |
|
|
| 185 |
// determine sizes of the fix groups, and the total size == maximum size reached together by all |
|
| 186 |
// fix groups. |
|
| 187 | 13x |
PosIntVector fixSizes; |
| 188 | 13x |
PosInt maxFixDim = 0; |
| 189 |
for (std::vector<PosIntVector>::const_iterator cols = fixColList.begin(); cols != fixColList.end(); ++cols) |
|
| 190 |
{
|
|
| 191 | ! |
PosInt thisSize = cols->size(); |
| 192 |
|
|
| 193 | ! |
maxFixDim += thisSize; |
| 194 | ! |
fixSizes.push_back(thisSize); |
| 195 |
} |
|
| 196 | 13x |
const FixInfo fixInfo(fixSizes, maxFixDim, fixIndices, fixColList); |
| 197 |
|
|
| 198 |
|
|
| 199 |
// search configuration: |
|
| 200 |
const GlmModelConfig config(rcpp_family, nullModelLogMargLik, nullModelDeviance, as<double>(rcpp_distribution["fixedg"]), rcpp_gPrior, |
|
| 201 | 13x |
data.response, bookkeep.debug, bookkeep.useFixedc, empiricalMean, |
| 202 | 26x |
as<bool>(rcpp_distribution["empiricalgPrior"])); |
| 203 |
// config of this model: |
|
| 204 | 13x |
const ModelPar thisModelConfig(rcpp_config, fpInfo); |
| 205 | ||
| 206 | ||
| 207 |
// ---------------------------------------------------------------------------------- |
|
| 208 |
// evaluate the z density |
|
| 209 |
// ---------------------------------------------------------------------------------- |
|
| 210 | ||
| 211 |
// get negative log unnormalized z density: a function object. |
|
| 212 |
NegLogUnnormZDens negLogUnnormZDens(thisModelConfig, |
|
| 213 |
data, |
|
| 214 |
fpInfo, |
|
| 215 |
ucInfo, |
|
| 216 |
fixInfo, |
|
| 217 |
config, |
|
| 218 | 13x |
bookkeep); |
| 219 | ||
| 220 |
// evaluate it at the given z values. |
|
| 221 | 13x |
NumericVector results; |
| 222 | ||
| 223 | 1235x |
for (MyDoubleVector::const_iterator z = zValues.begin(); z != zValues.end(); ++z) |
| 224 |
{
|
|
| 225 | 1222x |
results.push_back(negLogUnnormZDens(* z)); |
| 226 |
} |
|
| 227 | ||
| 228 |
// return the results vector |
|
| 229 | 26x |
return results; |
| 230 |
} |
| 1 |
/* |
|
| 2 |
* fpUcHandling.h |
|
| 3 |
* |
|
| 4 |
* Created on: 09.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef FPUCHANDLING_H_ |
|
| 9 |
#define FPUCHANDLING_H_ |
|
| 10 | ||
| 11 |
#include "types.h" |
|
| 12 |
#include <numeric> |
|
| 13 |
#include "rcppExport.h" |
|
| 14 | ||
| 15 |
// ***************************************************************************************************// |
|
| 16 | ||
| 17 |
// small helper function to structure code: |
|
| 18 |
// get the maximum power set for a range of FP maximum degrees |
|
| 19 |
MyDoubleVector |
|
| 20 |
getMaxPowerSet(const PosIntVector& fpmaxs); |
|
| 21 | ||
| 22 |
// ***************************************************************************************************// |
|
| 23 | ||
| 24 |
// build array of vectors of ColumnVectors holding the required transformed values for the design matrices |
|
| 25 |
// do not! center the column. This is done inside getFpMatrix, because the repeated powers case cannot |
|
| 26 |
// be treated here. |
|
| 27 |
AVectorArray |
|
| 28 |
getTransformedCols(const PosIntVector& fpcards, |
|
| 29 |
const PosIntVector& fppos, |
|
| 30 |
const PosIntVector& fpmaxs, |
|
| 31 |
const AMatrix& x); |
|
| 32 | ||
| 33 | ||
| 34 |
// ***************************************************************************************************// |
|
| 35 | ||
| 36 | ||
| 37 |
struct FpInfo |
|
| 38 |
{ // collects all information on fractional polynomials needed to be passed down
|
|
| 39 |
PosInt nFps; |
|
| 40 |
MyDoubleVector powerset; |
|
| 41 |
PosIntVector fpcards; |
|
| 42 |
PosIntVector fppos; |
|
| 43 |
PosIntVector fpmaxs; |
|
| 44 |
StrVector fpnames; |
|
| 45 |
AVectorArray tcols; |
|
| 46 |
PosInt maxFpDim; |
|
| 47 | ||
| 48 |
// number of possible univariate fps for each FP? |
|
| 49 |
IntVector numberPossibleFps; |
|
| 50 | ||
| 51 |
// what is the multiset expressing a linear inclusion of a covariate? |
|
| 52 |
Powers linearPowers; |
|
| 53 | ||
| 54 | ||
| 55 |
// ctr |
|
| 56 | 171x |
FpInfo(const PosIntVector& fpcards, |
| 57 |
const PosIntVector& fppos, |
|
| 58 |
const PosIntVector& fpmaxs, |
|
| 59 |
const StrVector& fpnames, |
|
| 60 | 171x |
const AMatrix& x) : |
| 61 | 171x |
nFps(fpmaxs.size()), powerset(getMaxPowerSet(fpmaxs)), fpcards(fpcards), |
| 62 | 171x |
fppos(fppos), fpmaxs(fpmaxs), fpnames(fpnames), tcols(getTransformedCols(fpcards, fppos, fpmaxs, x)), |
| 63 | 171x |
maxFpDim(std::accumulate(fpmaxs.begin(), fpmaxs.end(), 0)), |
| 64 | 171x |
numberPossibleFps(), |
| 65 | 171x |
linearPowers() |
| 66 |
{
|
|
| 67 |
// numbers of possible univariate fps? |
|
| 68 | 345x |
for(PosInt i=0; i != nFps; ++i) |
| 69 |
{
|
|
| 70 | 174x |
int thisNumber = 0; |
| 71 | 648x |
for(PosInt deg = 0; deg <= fpmaxs[i]; ++deg) |
| 72 |
{
|
|
| 73 | 474x |
thisNumber += Rf_choose(fpcards[i] - 1 + deg, deg); |
| 74 |
} |
|
| 75 | 174x |
numberPossibleFps.push_back(thisNumber); |
| 76 |
} |
|
| 77 | ||
| 78 |
// insert the index 5 for linear power 1 |
|
| 79 | 171x |
linearPowers.insert(5); |
| 80 | ||
| 81 |
} |
|
| 82 | ||
| 83 |
// convert inds m into powers vector |
|
| 84 |
MyDoubleVector |
|
| 85 |
inds2powers(const Powers& m) const; |
|
| 86 | ||
| 87 |
// convert std::vector of powers into Powers object (i.e. a multiset) |
|
| 88 |
Powers |
|
| 89 |
vec2inds(const MyDoubleVector& p) const; |
|
| 90 | ||
| 91 |
}; |
|
| 92 | ||
| 93 |
// ***************************************************************************************************// |
|
| 94 | ||
| 95 |
// collects all information on uncertain fixed form covariates groups |
|
| 96 |
struct UcInfo |
|
| 97 |
{
|
|
| 98 |
const PosIntVector ucSizes; |
|
| 99 |
const PosInt maxUcDim; |
|
| 100 |
const PosIntVector ucIndices; |
|
| 101 |
const std::vector <PosIntVector> ucColList; |
|
| 102 |
const PosInt nUcGroups; |
|
| 103 | ||
| 104 | 171x |
UcInfo(const PosIntVector& ucSizes, |
| 105 |
const PosInt maxUcDim, |
|
| 106 |
const PosIntVector& ucIndices, |
|
| 107 | 171x |
const std::vector<PosIntVector>& ucColList) : |
| 108 | 171x |
ucSizes(ucSizes), maxUcDim(maxUcDim), ucIndices(ucIndices), |
| 109 | 171x |
ucColList(ucColList), nUcGroups(ucColList.size()) |
| 110 |
{
|
|
| 111 |
} |
|
| 112 |
}; |
|
| 113 | ||
| 114 |
// ***************************************************************************************************// |
|
| 115 | ||
| 116 |
// collects all information on fixed form covariates groups |
|
| 117 |
struct FixInfo |
|
| 118 |
{
|
|
| 119 |
const PosIntVector fixSizes; |
|
| 120 |
const PosInt maxFixDim; |
|
| 121 |
const PosIntVector fixIndices; |
|
| 122 |
const std::vector <PosIntVector> fixColList; |
|
| 123 |
const PosInt nFixGroups; |
|
| 124 | ||
| 125 | 171x |
FixInfo(const PosIntVector& fixSizes, |
| 126 |
const PosInt maxFixDim, |
|
| 127 |
const PosIntVector& fixIndices, |
|
| 128 | 171x |
const std::vector<PosIntVector>& fixColList) : |
| 129 | 171x |
fixSizes(fixSizes), maxFixDim(maxFixDim), fixIndices(fixIndices), |
| 130 | 171x |
fixColList(fixColList), nFixGroups(fixColList.size()) |
| 131 |
{
|
|
| 132 |
} |
|
| 133 |
}; |
|
| 134 | ||
| 135 |
// ***************************************************************************************************// |
|
| 136 | ||
| 137 | ||
| 138 |
// return iterator of random element of myset; should be enclosed in getRNGstate() etc. |
|
| 139 |
template<class T> |
|
| 140 |
typename T::iterator |
|
| 141 | 1782944x |
discreteUniform(const T& container) |
| 142 |
{
|
|
| 143 | 1782944x |
if (container.empty()) |
| 144 |
{
|
|
| 145 | ! |
Rf_error("\ncontainer in call to discreteUniform is empty!\n");
|
| 146 |
} |
|
| 147 | ||
| 148 | 1782944x |
double u = unif_rand(); |
| 149 | ||
| 150 | 1782944x |
typename T::size_type size = container.size(); |
| 151 | 1782944x |
typename T::const_iterator i = container.begin(); |
| 152 | 1782944x |
typename T::size_type j = 1; |
| 153 | ||
| 154 | 2882048x |
while (u > 1.0 / size * j) |
| 155 |
{
|
|
| 156 | 549552x |
i++; |
| 157 | 549552x |
j++; |
| 158 |
} |
|
| 159 | ||
| 160 | 1782944x |
return i; |
| 161 |
} |
|
| 162 | ||
| 163 |
// ***************************************************************************************************// |
|
| 164 | ||
| 165 |
// get random int x with lower <= x < upper; should be enclosed in getRNGstate() etc. |
|
| 166 |
template<class INT> |
|
| 167 |
INT |
|
| 168 | 40784x |
discreteUniform(const INT& lower, const INT& upper) |
| 169 |
{
|
|
| 170 | 40784x |
if (lower >= upper) |
| 171 |
{
|
|
| 172 | ! |
Rf_error("\nlower = %d >= %d = upper in discreteUniform call\n", lower,
|
| 173 |
upper); |
|
| 174 |
} |
|
| 175 | ||
| 176 | 40784x |
double u = unif_rand(); |
| 177 | ||
| 178 | 40784x |
INT size = upper - lower; |
| 179 | 40784x |
INT ret = lower; |
| 180 | ||
| 181 | 182432x |
while (u > 1.0 / size * (ret - lower + 1)) |
| 182 |
{
|
|
| 183 | 141648x |
ret++; |
| 184 |
} |
|
| 185 | ||
| 186 | 40784x |
return ret; |
| 187 |
} |
|
| 188 | ||
| 189 |
// ***************************************************************************************************// |
|
| 190 | ||
| 191 | ||
| 192 |
// delete a number from a set |
|
| 193 |
template<class T> |
|
| 194 |
typename std::set<T> |
|
| 195 | 41440x |
removeElement(std::set<T> input, T element) |
| 196 |
{
|
|
| 197 | 41440x |
typename std::set<T>::iterator iter = input.begin(); |
| 198 | 154928x |
while (iter != input.end()) |
| 199 |
{
|
|
| 200 | 113488x |
if (*iter == element) |
| 201 |
// A copy of iter is passed into erase(), ++ is executed after erase(). |
|
| 202 |
// Thus iter remains valid |
|
| 203 | 20720x |
input.erase(iter++); |
| 204 |
else |
|
| 205 | 92768x |
++iter; |
| 206 |
} |
|
| 207 | ||
| 208 | 82880x |
return input; |
| 209 |
} |
|
| 210 | ||
| 211 |
// ***************************************************************************************************// |
|
| 212 | ||
| 213 |
// construct a sequence 1:maximum |
|
| 214 |
template<class T> |
|
| 215 |
typename std::set<T> |
|
| 216 | 112x |
constructSequence(T maximum) |
| 217 |
{
|
|
| 218 | 112x |
std::set<T> ret; |
| 219 | ||
| 220 | 400x |
for (T i = 1; i <= maximum; ++i) |
| 221 |
{
|
|
| 222 | 288x |
ret.insert(ret.end(), i); |
| 223 |
} |
|
| 224 | ||
| 225 | 112x |
return ret; |
| 226 |
} |
|
| 227 | ||
| 228 |
// ***************************************************************************************************// |
|
| 229 | ||
| 230 |
// convert frequency vector into multiset |
|
| 231 |
Powers |
|
| 232 |
freqvec2Powers(IntVector& vec, const int &vecLength); |
|
| 233 | ||
| 234 |
// ***************************************************************************************************// |
|
| 235 | ||
| 236 | ||
| 237 |
#endif /* FPUCHANDLING_H_ */ |
| 1 |
/* |
|
| 2 |
* zdensity.h |
|
| 3 |
* |
|
| 4 |
* Created on: 16.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef ZDENSITY_H_ |
|
| 9 |
#define ZDENSITY_H_ |
|
| 10 | ||
| 11 |
#include "iwls.h" |
|
| 12 |
#include "coxfit.h" |
|
| 13 |
#include "types.h" |
|
| 14 | ||
| 15 |
// 21/11/2012: major update due to inclusion of TBF methodology |
|
| 16 | ||
| 17 |
class NegLogUnnormZDens {
|
|
| 18 |
public: |
|
| 19 | ||
| 20 |
// call the function object: |
|
| 21 |
// z is the argument, |
|
| 22 |
double |
|
| 23 |
operator()(double z); |
|
| 24 | ||
| 25 |
// constructor |
|
| 26 |
NegLogUnnormZDens(const ModelPar &mod, |
|
| 27 |
const DataValues& data, |
|
| 28 |
const FpInfo& fpInfo, |
|
| 29 |
const UcInfo& ucInfo, |
|
| 30 |
const FixInfo& fixInfo, |
|
| 31 |
const GlmModelConfig& config, |
|
| 32 |
// return the approximate *conditional* density f(y | z, mod) by operator()? |
|
| 33 |
// otherwise return the approximate unnormalized *joint* density f(y, z | mod). |
|
| 34 |
const Book& bookkeep, |
|
| 35 |
PosInt nIter=40); |
|
| 36 | ||
| 37 |
// try to get the TBF log marginal likelihood |
|
| 38 |
double |
|
| 39 |
getTBFLogMargLik() const; |
|
| 40 | ||
| 41 |
// get the maximum log conditional marginal likelihood |
|
| 42 |
// and put the local EB estimate into zMode |
|
| 43 |
double |
|
| 44 |
getTBFMaxLogCondMargLik(double& zMode) const; |
|
| 45 | ||
| 46 |
// get residual deviance |
|
| 47 |
double |
|
| 48 | 15670x |
getResidualDeviance() const |
| 49 |
{
|
|
| 50 | 15670x |
return modResidualDeviance; |
| 51 |
} |
|
| 52 | ||
| 53 |
// destructor |
|
| 54 | 4704x |
~NegLogUnnormZDens() |
| 55 |
{
|
|
| 56 | 4704x |
delete iwlsObject; |
| 57 | 4704x |
delete coxfitObject; |
| 58 |
} |
|
| 59 | ||
| 60 |
private: |
|
| 61 |
// save the model reference and fp info, so we can write a nice warning message if the |
|
| 62 |
// IWLS fails for some z |
|
| 63 |
const ModelPar& mod; |
|
| 64 |
const FpInfo& fpInfo; |
|
| 65 |
const GlmModelConfig& config; |
|
| 66 |
const Book& bookkeep; |
|
| 67 | ||
| 68 |
// also save the original start linear predictor |
|
| 69 |
const AVector linPredStart; |
|
| 70 | ||
| 71 |
// pointer to an IWLS object |
|
| 72 |
Iwls * iwlsObject; |
|
| 73 | ||
| 74 |
// pointer to a Coxfit object |
|
| 75 |
Coxfit * coxfitObject; |
|
| 76 | ||
| 77 |
// number of IWLS iterations |
|
| 78 |
PosInt nIter; |
|
| 79 | ||
| 80 |
// the size of the model |
|
| 81 |
const int modSize; |
|
| 82 | ||
| 83 |
// the residual deviance of the model (only filled with correct value if TBF approach is used) |
|
| 84 |
double modResidualDeviance; |
|
| 85 |
}; |
|
| 86 | ||
| 87 | ||
| 88 |
#endif /* ZDENSITY_H_ */ |
| 1 |
/* |
|
| 2 |
* fpUcHandling.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 09.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
* |
|
| 7 |
* 13/07/2015 Replace assert() with Rccp:Stop() |
|
| 8 |
*/ |
|
| 9 | ||
| 10 |
#include "fpUcHandling.h" |
|
| 11 | ||
| 12 |
//#include <cassert> |
|
| 13 |
#include <algorithm> |
|
| 14 | ||
| 15 |
#include "rcppExport.h" |
|
| 16 | ||
| 17 |
// ***************************************************************************************************// |
|
| 18 | ||
| 19 |
// small helper function to structure code: |
|
| 20 |
// get the maximum power set for a range of FP maximum degrees |
|
| 21 |
MyDoubleVector |
|
| 22 | 1368x |
getMaxPowerSet(const PosIntVector& fpmaxs) |
| 23 |
{
|
|
| 24 |
// always in the power set: |
|
| 25 |
static const double fixedpowers[] = { -2, -1, -0.5, 0, 0.5, 1, 2, 3 };
|
|
| 26 |
// corresponding indices 0 1 2 3 4 5 6 7 |
|
| 27 | ||
| 28 |
// convert to double vector |
|
| 29 | 1368x |
MyDoubleVector maxPowerset(fixedpowers, fixedpowers + 8); |
| 30 | ||
| 31 | 1368x |
if (! fpmaxs.empty()) |
| 32 |
{
|
|
| 33 |
// consider if the power set is even bigger |
|
| 34 | 240x |
const PosInt biggestMaxDegree = * std::max_element(fpmaxs.begin(), |
| 35 | 240x |
fpmaxs.end()); |
| 36 |
for (PosInt more = 4; more <= biggestMaxDegree; ++more) |
|
| 37 |
{
|
|
| 38 | ! |
maxPowerset.push_back(more); |
| 39 |
} |
|
| 40 |
} |
|
| 41 | ||
| 42 |
// return the maximum power "set" |
|
| 43 | 2736x |
return (maxPowerset); |
| 44 |
} |
|
| 45 | ||
| 46 | ||
| 47 |
// ***************************************************************************************************// |
|
| 48 | ||
| 49 |
// fpInfo // |
|
| 50 | ||
| 51 |
MyDoubleVector |
|
| 52 | 9600x |
FpInfo::inds2powers(const Powers& m) const // convert inds m (a "Powers" object) into powers vector p (a MyDoubleVector) |
| 53 |
{
|
|
| 54 | 9600x |
MyDoubleVector ret; |
| 55 | ||
| 56 | 9600x |
for (Powers::const_iterator j = m.begin(); |
| 57 | 14808x |
j != m.end(); |
| 58 | 5208x |
j++) |
| 59 |
{
|
|
| 60 | 5208x |
ret.push_back(powerset[*j]); |
| 61 |
} |
|
| 62 | ||
| 63 | 9600x |
return ret; |
| 64 |
} |
|
| 65 | ||
| 66 | ||
| 67 |
// convert std::vector of powers into Powers object (i.e. a multiset) of ints of the power indexes. |
|
| 68 |
Powers |
|
| 69 | 264x |
FpInfo::vec2inds(const MyDoubleVector& p) const |
| 70 |
{
|
|
| 71 | 264x |
Powers ret; |
| 72 | ||
| 73 | 264x |
for(MyDoubleVector::const_iterator j = p.begin(); |
| 74 | 384x |
j != p.end(); |
| 75 | 120x |
j++) |
| 76 |
{
|
|
| 77 |
// the index which we look for is the difference of the start of the powerset and the |
|
| 78 |
// iterator pointing to the location where the power was found. |
|
| 79 | 120x |
int index = find(powerset.begin(), powerset.end(), *j) - powerset.begin(); |
| 80 | 120x |
ret.insert(index); |
| 81 |
} |
|
| 82 | ||
| 83 | 264x |
return ret; |
| 84 |
} |
|
| 85 | ||
| 86 |
// ***************************************************************************************************// |
|
| 87 | ||
| 88 | ||
| 89 |
// Box-Tidwell transform |
|
| 90 |
static inline double |
|
| 91 | 2355456x |
boxtidwell(double x, double power) |
| 92 |
{
|
|
| 93 | 2355456x |
return (power != 0) ? pow(x, power) : log(x); |
| 94 |
} |
|
| 95 | ||
| 96 |
// ***************************************************************************************************// |
|
| 97 | ||
| 98 |
// build array of vectors of AVectors holding the required transformed values for the design matrices |
|
| 99 |
// do not! center the column. This is done inside getFpMatrix, because the repeated powers case cannot |
|
| 100 |
// be treated here!! |
|
| 101 |
AVectorArray |
|
| 102 | 684x |
getTransformedCols(const PosIntVector& fpcards, |
| 103 |
const PosIntVector& fppos, |
|
| 104 |
const PosIntVector& fpmaxs, |
|
| 105 |
const AMatrix& x) |
|
| 106 |
{
|
|
| 107 |
// initialize the return value |
|
| 108 | 684x |
AVectorArray transformedCols; |
| 109 | ||
| 110 |
// get maximum powerset |
|
| 111 | 684x |
MyDoubleVector maxPowerset = getMaxPowerSet(fpmaxs); |
| 112 | ||
| 113 |
// process each FP term |
|
| 114 | 684x |
PosIntVector::const_iterator card = fpcards.begin(); |
| 115 | 1380x |
for (PosIntVector::const_iterator pos = fppos.begin(); pos != fppos.end(); ++card, ++pos) |
| 116 |
{
|
|
| 117 |
// the original column |
|
| 118 | 1392x |
const AVector thisCol = x.col(*pos - 1); |
| 119 | ||
| 120 |
// start vector of columns for this FP term |
|
| 121 | 696x |
std::vector<AVector> thisFp; |
| 122 | ||
| 123 |
// for every possible power |
|
| 124 | 6264x |
for (PosInt j = 0; j != *card; j++) |
| 125 |
{
|
|
| 126 |
// start with original column |
|
| 127 | 5568x |
AVector thisTransform = thisCol; |
| 128 | ||
| 129 |
// then transform it according to the power: |
|
| 130 | 2361024x |
for (PosInt k = 0; k < thisTransform.n_rows; ++k) |
| 131 |
{
|
|
| 132 |
// assert(thisTransform(k) > 0); |
|
| 133 |
if(!(thisTransform(k) > 0)) Rcpp::stop("fpUcHandling.cpp:getTransformedCols: thisTransform(k) not greater than 0");
|
|
| 134 | ||
| 135 | 7066368x |
thisTransform(k) = boxtidwell(thisTransform(k), |
| 136 | 2355456x |
maxPowerset[j]); |
| 137 | ||
| 138 |
// assert(! ISNAN(thisTransform(k))); |
|
| 139 |
if(ISNAN(thisTransform(k))) Rcpp::stop("fpUcHandling.cpp:getTransformedCols: thisTransform(k) is NAN");
|
|
| 140 |
} |
|
| 141 | ||
| 142 |
// and put it into vector of columns |
|
| 143 | 5568x |
thisFp.push_back(thisTransform); |
| 144 |
} |
|
| 145 | ||
| 146 |
// push vector of all powers for this FP term into the array |
|
| 147 | 696x |
transformedCols.push_back(thisFp); |
| 148 |
} |
|
| 149 | ||
| 150 |
// return the array |
|
| 151 | 1368x |
return (transformedCols); |
| 152 |
} |
|
| 153 | ||
| 154 |
// ***************************************************************************************************// |
|
| 155 | ||
| 156 | ||
| 157 |
// convert frequency vector into multiset |
|
| 158 |
Powers |
|
| 159 | ! |
freqvec2Powers(IntVector& vec, const int &vecLength) |
| 160 |
{
|
|
| 161 | ! |
Powers ret; |
| 162 | ! |
for (int power = 0; power != vecLength; power++) |
| 163 |
{
|
|
| 164 | ! |
for (int times = 0; times != vec[power]; times++) |
| 165 | ! |
ret.insert(power); |
| 166 |
} |
|
| 167 | ! |
return ret; |
| 168 |
} |
|
| 169 | ||
| 170 |
// ***************************************************************************************************// |
|
| 171 | ||
| 172 |
// End of file. |
| 1 |
/* |
|
| 2 |
* functionWraps.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 06.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#include "functionWraps.h" |
|
| 9 |
#include "rcppExport.h" |
|
| 10 | ||
| 11 |
using namespace Rcpp; |
|
| 12 | ||
| 13 |
// ***************************************************************************************************// |
|
| 14 | ||
| 15 |
// first R functions taking only a scalar |
|
| 16 | ||
| 17 |
//// constructor |
|
| 18 |
//RFunction::RFunction(SEXP R_fun) |
|
| 19 |
//{
|
|
| 20 |
// // allocate double scalar as function argument |
|
| 21 |
// Rf_protect(R_functionArgument = Rf_allocVector(REALSXP, 1)); |
|
| 22 |
// |
|
| 23 |
// // construct the function call |
|
| 24 |
// Rf_protect(R_functionCall = Rf_lcons(R_fun, |
|
| 25 |
// Rf_lcons(R_functionArgument, |
|
| 26 |
// R_NilValue))); |
|
| 27 |
// |
|
| 28 |
// // get the function frame (is connected to the R function, so is already protected) |
|
| 29 |
// R_functionFrame = FRAME(R_fun); |
|
| 30 |
//} |
|
| 31 | ||
| 32 |
//// copy constructor |
|
| 33 |
//RFunction::RFunction(const RFunction& old) |
|
| 34 |
//{
|
|
| 35 |
// // double scalar as function argument: |
|
| 36 |
// // take the same one as the old object. |
|
| 37 |
// Rf_protect(R_functionArgument = old.R_functionArgument); |
|
| 38 |
// |
|
| 39 |
// // so also for the function call we must take the same one |
|
| 40 |
// Rf_protect(R_functionCall = old.R_functionCall); |
|
| 41 |
// |
|
| 42 |
// // and then finally get the function frame |
|
| 43 |
// R_functionFrame = old.R_functionFrame; |
|
| 44 |
//} |
|
| 45 | ||
| 46 | ||
| 47 |
//// assignment operator |
|
| 48 |
//RFunction& RFunction::operator=(const RFunction& rhs) |
|
| 49 |
//{
|
|
| 50 |
// if(&rhs != this) |
|
| 51 |
// {
|
|
| 52 |
// // first unprotect the current members |
|
| 53 |
// Rf_unprotect(3); |
|
| 54 |
// |
|
| 55 |
// // then assign the new members: |
|
| 56 |
// |
|
| 57 |
// // double scalar as function argument: |
|
| 58 |
// // take the same one as the old object. |
|
| 59 |
// Rf_protect(R_functionArgument = rhs.R_functionArgument); |
|
| 60 |
// |
|
| 61 |
// // so also for the function call we must take the same one |
|
| 62 |
// Rf_protect(R_functionCall = rhs.R_functionCall); |
|
| 63 |
// |
|
| 64 |
// // and then finally get the function frame |
|
| 65 |
// R_functionFrame = rhs.R_functionFrame; |
|
| 66 |
// } |
|
| 67 |
// |
|
| 68 |
// // return ourself |
|
| 69 |
// return *this; |
|
| 70 |
//} |
|
| 71 |
// |
|
| 72 | ||
| 73 |
//// destructor |
|
| 74 |
//RFunction::~RFunction() |
|
| 75 |
//{
|
|
| 76 |
// // unprotect the the function argument and the call |
|
| 77 |
// Rf_unprotect(2); |
|
| 78 |
// |
|
| 79 |
// // everything else is automatically destroyed. |
|
| 80 |
//} |
|
| 81 | ||
| 82 |
// for use as a function |
|
| 83 |
double |
|
| 84 | 2694296x |
RFunction::operator()(double x) const |
| 85 |
{
|
|
| 86 |
// // input the argument |
|
| 87 |
// REAL(R_functionArgument)[0] = x; |
|
| 88 | ||
| 89 |
// // get the result |
|
| 90 |
// SEXP R_pars; |
|
| 91 |
// Rf_protect(R_pars = Rf_eval(R_functionCall, |
|
| 92 |
// R_functionFrame)); |
|
| 93 |
// |
|
| 94 |
// // now convert to double vector |
|
| 95 |
// double ret = Rf_asReal(Rf_coerceVector(R_pars, REALSXP)); |
|
| 96 |
// |
|
| 97 |
// // unprotect local R result |
|
| 98 |
// Rf_unprotect(1); |
|
| 99 |
// |
|
| 100 |
// // and return that |
|
| 101 |
// return ret; |
|
| 102 | ||
| 103 |
// shorter: |
|
| 104 |
// 8/11/2013: debug error |
|
| 105 |
// "'rho' must be an environment not pairlist: detected in C-level eval" |
|
| 106 |
// In gdb I find the following "classes" for the arguments: |
|
| 107 |
// (see Rinternals.h, line 98 ff.) |
|
| 108 | ||
| 109 | 2694296x |
double ret = as<double>(fun(x)); |
| 110 | 2694296x |
return ret; |
| 111 | ||
| 112 |
// return Rf_asReal(Rf_coerceVector(Rf_eval(R_functionCall, // SEXP type 6: language constructs |
|
| 113 |
// R_functionFrame), // SEXP type 2: lists of dotted pairs |
|
| 114 |
// REALSXP)); |
|
| 115 |
} |
|
| 116 | ||
| 117 |
// ***************************************************************************************************// |
|
| 118 | ||
| 119 | ||
| 120 |
// now R functions taking a fixed length vector |
|
| 121 | ||
| 122 | ||
| 123 |
//// constructor |
|
| 124 |
//VectorRFunction::VectorRFunction(SEXP R_function, R_len_t argsize) : |
|
| 125 |
// argsize(argsize) |
|
| 126 |
//{
|
|
| 127 |
// // allocate vector as function argument |
|
| 128 |
// Rf_protect(R_functionArgument = Rf_allocVector(REALSXP, argsize)); |
|
| 129 |
// |
|
| 130 |
// // construct the function call |
|
| 131 |
// Rf_protect(R_functionCall = Rf_lcons(R_function, |
|
| 132 |
// Rf_lcons(R_functionArgument, |
|
| 133 |
// R_NilValue))); |
|
| 134 |
// |
|
| 135 |
// // get the function frame (is connected to the R function, so is already protected) |
|
| 136 |
// R_functionFrame = FRAME(R_function); |
|
| 137 |
//} |
|
| 138 |
// |
|
| 139 |
// |
|
| 140 |
//// destructor |
|
| 141 |
//VectorRFunction::~VectorRFunction() |
|
| 142 |
//{
|
|
| 143 |
// // unprotect the function argument and call |
|
| 144 |
// Rf_unprotect(2); |
|
| 145 |
// |
|
| 146 |
// // everything else is automatically destroyed. |
|
| 147 |
//} |
|
| 148 | ||
| 149 |
// for use as a function |
|
| 150 |
double |
|
| 151 | ! |
VectorRFunction::operator()(const double* vector) const |
| 152 |
{
|
|
| 153 |
// copy the vector |
|
| 154 | ! |
NumericVector arg(argsize); |
| 155 | ! |
std::copy(vector, vector + argsize, arg.begin()); |
| 156 | ||
| 157 |
// and then evaluate the function call. |
|
| 158 | ! |
double ret = as<double>(fun(arg)); |
| 159 | ! |
return ret; |
| 160 |
} |
|
| 161 | ||
| 162 | ||
| 163 |
// ***************************************************************************************************// |
|
| 164 | ||
| 165 |
// save one pair of argument and function value |
|
| 166 |
void |
|
| 167 | 1916662x |
Cache::save(double arg, double val) |
| 168 |
{
|
|
| 169 | 1916662x |
args.push_back(arg); |
| 170 | 1916662x |
vals.push_back(val); |
| 171 |
} |
|
| 172 | ||
| 173 |
// clear the cache |
|
| 174 |
void |
|
| 175 | ! |
Cache::clear() |
| 176 |
{
|
|
| 177 | ! |
args.clear(); |
| 178 | ! |
vals.clear(); |
| 179 |
} |
|
| 180 | ||
| 181 |
// query for a function value |
|
| 182 |
double |
|
| 183 | 2958054x |
Cache::getValue(double arg) const |
| 184 |
{
|
|
| 185 |
// search for the argument |
|
| 186 | 2958054x |
MyDoubleVector::const_iterator iterVals = vals.begin(); |
| 187 | 2958054x |
for(MyDoubleVector::const_iterator |
| 188 | 2958054x |
iterArgs = args.begin(); |
| 189 | 171884273x |
iterArgs != args.end(); |
| 190 | 168926219x |
++iterArgs, ++iterVals) |
| 191 |
{
|
|
| 192 |
// match -> return corresponding value. |
|
| 193 | 169967611x |
if(*iterArgs == arg) |
| 194 |
{
|
|
| 195 | 1041392x |
return *iterVals; |
| 196 |
} |
|
| 197 |
} |
|
| 198 | ||
| 199 |
// if we have not found the argument, return NA |
|
| 200 | 1916662x |
return R_NaReal; |
| 201 |
} |
|
| 202 | ||
| 203 |
// initialize from an R list |
|
| 204 | ! |
Cache::Cache(List& rcpp_list) : |
| 205 | ! |
args(as<MyDoubleVector>(rcpp_list["args"])), |
| 206 | ! |
vals(as<MyDoubleVector>(rcpp_list["vals"])) |
| 207 |
{
|
|
| 208 | ! |
if(args.size() != vals.size()) |
| 209 |
{
|
|
| 210 | ! |
Rf_error("Lengths of args and vals vectors in R list converted to Cache object not equal!");
|
| 211 |
} |
|
| 212 |
} |
|
| 213 | ||
| 214 |
// convert to an Rcpp list |
|
| 215 |
List |
|
| 216 | 4719x |
Cache::convert2list() const |
| 217 |
{
|
|
| 218 | 9438x |
return List::create(_["args"] = getArgs(), |
| 219 | 14157x |
_["vals"] = getVals()); |
| 220 |
} |
|
| 221 | ||
| 222 | ||
| 223 |
// ***************************************************************************************************// |
|
| 224 | ||
| 225 |
// End of file. |
| 1 |
#include "rcppExport.h" |
|
| 2 |
#include "combinatorics.h" |
|
| 3 |
#include "dataStructure.h" |
|
| 4 |
#include "types.h" |
|
| 5 |
#include "zdensity.h" |
|
| 6 |
#include "bfgs.h" |
|
| 7 |
#include "optimize.h" |
|
| 8 |
#include "fpUcHandling.h" |
|
| 9 | ||
| 10 |
#ifdef _OPENMP |
|
| 11 |
#include <omp.h> |
|
| 12 |
#endif |
|
| 13 | ||
| 14 |
#include <math.h> |
|
| 15 |
#include <map> |
|
| 16 |
#include <vector> |
|
| 17 |
#include <algorithm> |
|
| 18 |
#include <iterator> |
|
| 19 |
#include <numeric> |
|
| 20 |
#include <iostream> |
|
| 21 |
#include <climits> |
|
| 22 |
#include <cmath> // for the long double overloading of exp and log |
|
| 23 |
#include <sstream> |
|
| 24 |
#include <string> |
|
| 25 |
#include <stdexcept> |
|
| 26 | ||
| 27 |
// using pretty much: |
|
| 28 |
using std::map; |
|
| 29 |
using std::set; |
|
| 30 |
using std::vector; |
|
| 31 |
using std::accumulate; |
|
| 32 |
using std::find; |
|
| 33 |
using std::set_difference; |
|
| 34 |
using std::count; |
|
| 35 |
using std::max_element; |
|
| 36 | ||
| 37 |
using namespace Rcpp; |
|
| 38 | ||
| 39 |
// ***************************************************************************************************// |
|
| 40 | ||
| 41 |
// compute varying part of log marginal likelihood for specific GLM / Cox model |
|
| 42 |
// plus byproducts. |
|
| 43 |
double |
|
| 44 | 15670x |
getGlmVarLogMargLik(const ModelPar &mod, |
| 45 |
const DataValues& data, |
|
| 46 |
const FpInfo& fpInfo, |
|
| 47 |
const UcInfo& ucInfo, |
|
| 48 |
const FixInfo& fixInfo, |
|
| 49 |
const Book& bookkeep, |
|
| 50 |
const GlmModelConfig& config, |
|
| 51 |
const GaussHermite& gaussHermite, |
|
| 52 |
Cache& cache, |
|
| 53 |
double& zMode, |
|
| 54 |
double& zVar, |
|
| 55 |
double& laplaceApprox, |
|
| 56 |
double& residualDeviance) |
|
| 57 |
{
|
|
| 58 |
// echo detailed progress in debug mode |
|
| 59 | 15670x |
if(bookkeep.debug) |
| 60 |
{
|
|
| 61 | ! |
Rprintf("\ngetGlmVarLogMargLik: Starting log marginal likelihood approximation for:\n%s", mod.print(fpInfo).c_str());
|
| 62 |
} |
|
| 63 | ||
| 64 |
// check if any interrupt signals have been entered |
|
| 65 | 15670x |
R_CheckUserInterrupt(); |
| 66 | ||
| 67 |
// the return value will be placed in here: |
|
| 68 | 15670x |
double ret = 0.0; |
| 69 | ||
| 70 |
// protect everything for problems in IWLS |
|
| 71 |
try |
|
| 72 |
{
|
|
| 73 |
// get negative log unnormalized z density: a function object. |
|
| 74 |
NegLogUnnormZDens negLogUnnormZDens(mod, |
|
| 75 |
data, |
|
| 76 |
fpInfo, |
|
| 77 |
ucInfo, |
|
| 78 |
fixInfo, |
|
| 79 |
config, |
|
| 80 | 15670x |
bookkeep); |
| 81 | 15670x |
residualDeviance = negLogUnnormZDens.getResidualDeviance(); |
| 82 | ||
| 83 |
// try to ask for analytic solutions in the TBF case |
|
| 84 |
// (they are available for the incomplete inverse gamma hyperprior and of course |
|
| 85 |
// for the fixed g case) |
|
| 86 | 15670x |
if(bookkeep.tbf) |
| 87 |
{
|
|
| 88 | 14390x |
if(bookkeep.debug) |
| 89 |
{
|
|
| 90 | ! |
Rprintf("\ngetGlmVarLogMargLik: TBF used, ask for analytic solution ...");
|
| 91 |
} |
|
| 92 | ||
| 93 | 14390x |
if(bookkeep.useFixedg) |
| 94 |
{
|
|
| 95 | 1390x |
zMode = log(config.fixedg); |
| 96 | 1390x |
ret = - negLogUnnormZDens(zMode); |
| 97 | ||
| 98 | 1390x |
if(bookkeep.debug) |
| 99 |
{
|
|
| 100 | ! |
Rprintf("\ngetGlmVarLogMargLik: using fixed value g = %f", config.fixedg);
|
| 101 |
} |
|
| 102 |
} |
|
| 103 | 13000x |
else if(bookkeep.empiricalBayes) |
| 104 |
{
|
|
| 105 | 630x |
ret = negLogUnnormZDens.getTBFMaxLogCondMargLik(zMode); |
| 106 | ||
| 107 | 630x |
if(bookkeep.debug) |
| 108 |
{
|
|
| 109 | ! |
Rprintf("\ngetGlmVarLogMargLik: local empirical Bayes estimate of g is %f", exp(zMode));
|
| 110 |
} |
|
| 111 |
} |
|
| 112 |
else |
|
| 113 |
{
|
|
| 114 | 12370x |
ret = negLogUnnormZDens.getTBFLogMargLik(); |
| 115 |
} |
|
| 116 | ||
| 117 | 14390x |
if(! R_IsNA(ret)) |
| 118 |
{
|
|
| 119 | 2020x |
if(bookkeep.debug) |
| 120 |
{
|
|
| 121 | ! |
Rprintf("\ngetGlmVarLogMargLik: analytic solution was found with result %f", ret);
|
| 122 |
} |
|
| 123 | 2020x |
return ret; |
| 124 |
} |
|
| 125 |
else |
|
| 126 |
{
|
|
| 127 | 12370x |
if(bookkeep.debug) |
| 128 |
{
|
|
| 129 | ! |
Rprintf("\ngetGlmVarLogMargLik: no analytic solution possible for this hyperprior on g");
|
| 130 |
} |
|
| 131 |
} |
|
| 132 | ||
| 133 |
// if no analytic solutions are available, we continue with the code below |
|
| 134 |
} |
|
| 135 | ||
| 136 |
// cache this function, because we do not want to evaluate it more often |
|
| 137 |
// than necessary. |
|
| 138 | 13650x |
CachedFunction<NegLogUnnormZDens> cachedNegLogUnnormZDens(negLogUnnormZDens); |
| 139 | ||
| 140 | 13650x |
if(bookkeep.useFixedg) |
| 141 |
{
|
|
| 142 | ! |
zMode = log(config.fixedg); |
| 143 | ||
| 144 |
// echo detailed progress in debug mode |
|
| 145 | ! |
if(bookkeep.debug) |
| 146 |
{
|
|
| 147 | ! |
Rprintf("\ngetGlmVarLogMargLik: returning conditional marginal likelihood at fixed z = %f", zMode);
|
| 148 |
} |
|
| 149 | ||
| 150 |
// return the log conditional marginal density log f(y | zfixed) |
|
| 151 | ! |
ret = - cachedNegLogUnnormZDens(zMode); |
| 152 |
} |
|
| 153 | 13650x |
else if(bookkeep.empiricalBayes) |
| 154 |
{
|
|
| 155 |
// construct an appropriate object for using the optimize routine |
|
| 156 |
Brent<CachedFunction<NegLogUnnormZDens> > brent(cachedNegLogUnnormZDens, |
|
| 157 |
-20,//-100.0, // log(DBL_MIN) + 40.0, |
|
| 158 |
20,//+200.0, // log(DBL_MAX) - 40.0, |
|
| 159 | ! |
sqrt(EPS)); |
| 160 |
|
|
| 161 |
// for(int zz = -55; zz < 40; zz=zz+2){
|
|
| 162 |
// Rcpp::Rcout << zz<< " : "<< cachedNegLogUnnormZDens(zz) << std::endl; |
|
| 163 |
// } |
|
| 164 |
// and get the mode from that. |
|
| 165 | ! |
zMode = brent.minimize(); |
| 166 | ||
| 167 |
// Rcpp::Rcout << "Zmode is : "<< zMode << std::endl; |
|
| 168 |
|
|
| 169 |
// zVar and laplaceApprox are not touched, as they are not needed! |
|
| 170 | ||
| 171 |
// echo detailed progress in debug mode |
|
| 172 | ! |
if(bookkeep.debug) |
| 173 |
{
|
|
| 174 | ! |
Rprintf("\ngetGlmVarLogMargLik: finished optimization of conditional marginal likelihood at mode %f", zMode);
|
| 175 |
} |
|
| 176 | ||
| 177 |
// return the log conditional marginal density log f(y | z_mode) |
|
| 178 | ! |
ret = - cachedNegLogUnnormZDens(zMode); |
| 179 |
} |
|
| 180 |
else // start full Bayes |
|
| 181 |
{
|
|
| 182 |
// get function invHess to compute an accurate variance estimate |
|
| 183 | 13650x |
AccurateNumericInvHessian<CachedFunction<NegLogUnnormZDens> > invHess(cachedNegLogUnnormZDens); |
| 184 | ||
| 185 |
// decide if bfgs, or optimize should be used |
|
| 186 | 13650x |
if(bookkeep.useBfgs) |
| 187 |
{
|
|
| 188 |
// and run the minimization algorithm on it. |
|
| 189 | ||
| 190 |
// constrain z to lie in the interval [log(DBL_MIN), log(DBL_MAX)], so |
|
| 191 |
// that g = exp(z) is always in [DBL_MIN, DBL_MAX]. |
|
| 192 |
// and we put a little safety margin on it. |
|
| 193 | ||
| 194 |
// 18/05: test a bit more realistic interval (+-200) |
|
| 195 |
// 08/07: even further shorten the interval to -100, 200 because the posterior |
|
| 196 |
// will be very flat below z=-100 |
|
| 197 |
Bfgs<CachedFunction<NegLogUnnormZDens> > bfgs(cachedNegLogUnnormZDens, |
|
| 198 | ! |
bookkeep.debug, |
| 199 |
-100.0, // log(DBL_MIN) + 40.0, |
|
| 200 | ! |
+200.0); //log(DBL_MAX) - 40.0); |
| 201 | ||
| 202 |
// now minimize the negative log density, |
|
| 203 |
// and put the resulting mode into zMode. |
|
| 204 | ! |
int convergence = bfgs.minimize(0.0, |
| 205 |
zMode, |
|
| 206 |
zVar); |
|
| 207 | ||
| 208 |
// if we lost precision, do a second minimization starting from the previous mode |
|
| 209 |
// and estimate the variance afterwards separately (otherwise the variance estimate |
|
| 210 |
// would not be reliable!) |
|
| 211 | ! |
if(convergence == -1) |
| 212 |
{
|
|
| 213 | ! |
bfgs.minimize(zMode, |
| 214 |
zMode, |
|
| 215 |
zVar); |
|
| 216 | ! |
zVar = invHess(zMode); |
| 217 |
} |
|
| 218 |
} |
|
| 219 |
else // use optimize |
|
| 220 |
{
|
|
| 221 |
// construct an appropriate object for using the optimize routine |
|
| 222 |
Brent<CachedFunction<NegLogUnnormZDens> > brent(cachedNegLogUnnormZDens, |
|
| 223 |
-100.0, // log(DBL_MIN) + 40.0, |
|
| 224 |
+200.0, // log(DBL_MAX) - 40.0, |
|
| 225 | 13650x |
sqrt(EPS)); |
| 226 |
// and get the mode from that. |
|
| 227 | 13650x |
zMode = brent.minimize(); |
| 228 | ||
| 229 |
// here we have to compute the inverse hessian afterwards: |
|
| 230 |
// use the same epsilon here as for the minimization routine. |
|
| 231 | 13650x |
zVar = invHess(zMode); |
| 232 |
} |
|
| 233 | ||
| 234 |
// be careful that the result for zVar is not totally wrong. |
|
| 235 |
// if the variance estimate is very large, warn the user. |
|
| 236 | 13650x |
if(zVar > bookkeep.largeVariance) |
| 237 |
{
|
|
| 238 | ! |
Rf_warning("\nLarge variance estimate (%f > %f) for z marginal encountered by BFGS",
|
| 239 | ! |
zVar, bookkeep.largeVariance); |
| 240 |
} |
|
| 241 | ||
| 242 |
// since we minimize here the negative log density, the resulting inverse |
|
| 243 |
// hessian is the estimated variance of the approximating Gaussian. |
|
| 244 | ||
| 245 |
// be sure that we get a positive variance estimate |
|
| 246 | 13650x |
if(zVar <= 0.0) |
| 247 |
{
|
|
| 248 |
// warn |
|
| 249 | ! |
Rf_warning("\nNon-positive variance estimate %f encountered!\nProbably the optimization did not converge.\nResetting to default variance",
|
| 250 |
zVar); |
|
| 251 | ||
| 252 |
// set large default to explore some space |
|
| 253 | ! |
zVar = 15.0; |
| 254 |
} |
|
| 255 | ||
| 256 |
// echo detailed progress in debug mode |
|
| 257 | 13650x |
if(bookkeep.debug) |
| 258 |
{
|
|
| 259 | ! |
Rprintf("\ngetGlmVarLogMargLik: finished minimization at mode %f with variance %f", zMode, zVar);
|
| 260 |
} |
|
| 261 | ||
| 262 |
// compute the Laplace approximation to the log marginal likelihood from the mode and variance |
|
| 263 | 13650x |
laplaceApprox = M_LN_SQRT_2PI + 0.5 * log(zVar) - cachedNegLogUnnormZDens(zMode); |
| 264 |
// so this does not require evaluations inside the Gauss-Hermite quadrature. |
|
| 265 | ||
| 266 |
// then compute the Gauss-Hermite quadrature, using the supplied standard nodes and |
|
| 267 |
// weights from R |
|
| 268 | 13650x |
MyDoubleVector nodes; |
| 269 | 13650x |
MyDoubleVector logWeights; |
| 270 | ||
| 271 |
// get the nodes and log weights for this mode and variance: |
|
| 272 | 13650x |
gaussHermite.getNodesAndLogWeights(zMode, zVar, nodes, logWeights); |
| 273 | ||
| 274 |
// the log contributions which will be stored here: |
|
| 275 | 13650x |
SafeSum logContributions; |
| 276 | ||
| 277 |
// compute them now |
|
| 278 | 13650x |
MyDoubleVector::const_iterator n = nodes.begin(); |
| 279 | 13650x |
for(MyDoubleVector::const_iterator |
| 280 | 13650x |
w = logWeights.begin(); |
| 281 | 286650x |
w != logWeights.end(); |
| 282 | 273000x |
++w, ++n) |
| 283 |
{
|
|
| 284 | 273000x |
logContributions.add((*w) - cachedNegLogUnnormZDens(*n)); |
| 285 |
} |
|
| 286 | ||
| 287 |
// the result is the log of the sum of the exp'ed values |
|
| 288 | 13650x |
ret = logContributions.logSumExp(); |
| 289 | 13650x |
} // end full Bayes |
| 290 | ||
| 291 |
// echo detailed progress in debug mode |
|
| 292 | 13650x |
if(bookkeep.debug) |
| 293 |
{
|
|
| 294 | ! |
Rprintf("\ngetGlmVarLogMargLik: finished log marginal likelihood approximation.");
|
| 295 |
} |
|
| 296 | ||
| 297 |
// also give back the cache |
|
| 298 | 13650x |
cache = cachedNegLogUnnormZDens.getCache(); |
| 299 | ||
| 300 |
// check finiteness |
|
| 301 | 13650x |
if (! R_finite(ret)) |
| 302 |
{
|
|
| 303 | ! |
std::ostringstream stream; |
| 304 | ! |
stream << "getGlmVarLogMargLik got non-finite log marginal likelihood approximation " << ret; |
| 305 | ! |
throw std::domain_error(stream.str().c_str()); |
| 306 |
} |
|
| 307 |
} |
|
| 308 |
catch (std::domain_error& error) |
|
| 309 |
{
|
|
| 310 |
if(bookkeep.debug) |
|
| 311 |
{
|
|
| 312 |
Rprintf("\ngetGlmVarLogMargLik: Model can not be included, because\n%s", error.what());
|
|
| 313 |
} |
|
| 314 | ||
| 315 |
return R_NaN; |
|
| 316 |
} |
|
| 317 | ||
| 318 |
// only then return the estimate |
|
| 319 | 13650x |
return ret; |
| 320 |
} |
|
| 321 | ||
| 322 |
// ***************************************************************************************************// |
|
| 323 | ||
| 324 |
// compute varying part of logarithm of model prior |
|
| 325 |
double |
|
| 326 | 15820x |
getVarLogPrior( |
| 327 |
const ModelPar &mod, |
|
| 328 |
const FpInfo &fpInfo, |
|
| 329 |
const UcInfo &ucInfo, |
|
| 330 |
const FixInfo& fixInfo, |
|
| 331 |
const Book &bookkeep) |
|
| 332 |
{
|
|
| 333 | 15820x |
if (bookkeep.modelPrior == "sparse") |
| 334 |
{
|
|
| 335 | 1320x |
SafeSum thisVarLogPrior; |
| 336 | 10560x |
for (unsigned int i = 0; i != fpInfo.nFps; i++) |
| 337 |
{ // for each fp covariate
|
|
| 338 | 9240x |
unsigned int degree = mod.fpPars.at(i).size(); |
| 339 | 9240x |
double thisVal = -Rf_lchoose(fpInfo.fpcards[i] - 1 + degree, degree) |
| 340 | 9240x |
- log1p(fpInfo.fpmaxs[i]); |
| 341 | 9240x |
thisVarLogPrior.add(thisVal); |
| 342 |
} |
|
| 343 | 1320x |
return thisVarLogPrior.sum() - ((ucInfo.nUcGroups + fixInfo.nFixGroups) * M_LN2); //TODO Is this correct? |
| 344 |
} |
|
| 345 | 14500x |
else if (bookkeep.modelPrior == "dependent") |
| 346 |
{
|
|
| 347 |
// determine number of all covariates (covariate groups): |
|
| 348 | 14500x |
int nCovs = ucInfo.nUcGroups + fixInfo.nFixGroups + fpInfo.nFps; //TODO Is this correct? |
| 349 | ||
| 350 |
// determine number of included FPs and which are nonlinear: |
|
| 351 | 14500x |
int nInclContinuous = 0; |
| 352 | 14500x |
PosIntVector nonlinearFps; |
| 353 | ||
| 354 | 52660x |
for(PosInt i = 0; i != fpInfo.nFps; i++) |
| 355 |
{
|
|
| 356 | 38160x |
Powers powersi = mod.fpPars.at(i); |
| 357 | 38160x |
if (! powersi.empty()) |
| 358 |
{
|
|
| 359 | 25360x |
++nInclContinuous; |
| 360 | ||
| 361 | 25360x |
if(mod.fpPars.at(i) != fpInfo.linearPowers) |
| 362 |
{
|
|
| 363 | 17200x |
nonlinearFps.push_back(i); |
| 364 |
} |
|
| 365 |
} |
|
| 366 |
} |
|
| 367 | ||
| 368 |
// determine number of included discrete covariates: |
|
| 369 | 14500x |
int nInclDiscrete = mod.ucPars.size() + fixInfo.nFixGroups; |
| 370 | ||
| 371 |
// so altogether there are |
|
| 372 | 14500x |
int nIncluded = nInclContinuous + nInclDiscrete; |
| 373 |
// included covariates |
|
| 374 | ||
| 375 |
// and the number of possible nonlinear transformations |
|
| 376 |
// for each variable is also included in the computations: |
|
| 377 | 14500x |
double sumLogNonlinearPossibilities = 0.0; |
| 378 | 14500x |
for(PosIntVector::const_iterator |
| 379 | 14500x |
i = nonlinearFps.begin(); |
| 380 | 31700x |
i != nonlinearFps.end(); |
| 381 | 17200x |
++i) |
| 382 |
{
|
|
| 383 | 17200x |
sumLogNonlinearPossibilities += log(fpInfo.numberPossibleFps.at(*i) - 2.0); |
| 384 |
// Note: degree 0 and linear degree 1 FP are subtracted |
|
| 385 |
} |
|
| 386 | ||
| 387 | 14500x |
double result = - log1p(nCovs) - Rf_lchoose(nCovs, nIncluded) - |
| 388 | 14500x |
log1p(nInclContinuous) - Rf_lchoose(nInclContinuous, nonlinearFps.size()) - |
| 389 | 14500x |
sumLogNonlinearPossibilities; |
| 390 | ||
| 391 | 14500x |
return result; |
| 392 |
} |
|
| 393 |
else |
|
| 394 |
{
|
|
| 395 | ! |
return - (ucInfo.nUcGroups * M_LN2); |
| 396 |
} |
|
| 397 |
} |
|
| 398 | ||
| 399 |
// ***************************************************************************************************// |
|
| 400 | ||
| 401 |
// 21/11/2012: modify for tbf methodology |
|
| 402 | ||
| 403 |
// compute (varying part of) marginal likelihood and prior of model and insert it into model set |
|
| 404 |
void |
|
| 405 | 640x |
computeGlm(const ModelPar &mod, |
| 406 |
set<Model> &space, |
|
| 407 |
const DataValues& data, |
|
| 408 |
const FpInfo& fpInfo, |
|
| 409 |
const UcInfo& ucInfo, |
|
| 410 |
const FixInfo& fixInfo, |
|
| 411 |
Book& bookkeep, |
|
| 412 |
const GlmModelConfig& config, |
|
| 413 |
const GaussHermite& gaussHermite) |
|
| 414 |
{
|
|
| 415 |
// log prior |
|
| 416 | 640x |
const double thisLogPrior = getVarLogPrior(mod, |
| 417 |
fpInfo, |
|
| 418 |
ucInfo, |
|
| 419 |
fixInfo, |
|
| 420 |
bookkeep); |
|
| 421 | ||
| 422 |
// initialize variables which will definitely be overwritten below |
|
| 423 | 640x |
double thisVarLogMargLik = 0.0; |
| 424 | ||
| 425 |
// this will not be available when this is the null model! then it is *not* overwritten below. |
|
| 426 | 640x |
double zMode = R_NaReal; |
| 427 | 640x |
double zVar = R_NaReal; |
| 428 | 640x |
double laplaceApprox = R_NaReal; |
| 429 | 640x |
double residualDeviance = R_NaReal; |
| 430 | 640x |
Cache cache; |
| 431 | ||
| 432 |
// compute log marginal likelihood, and also as byproducts unnormalized z density information. |
|
| 433 | ||
| 434 |
// be careful: if this is the null model, then just input the data computed in R. |
|
| 435 | 640x |
if(mod.size(ucInfo, fixInfo) == 0) |
| 436 |
{
|
|
| 437 | 10x |
thisVarLogMargLik = config.nullModelLogMargLik; |
| 438 |
} |
|
| 439 |
else // not the null model, so at least one other coefficient than the intercept present in the model |
|
| 440 |
{
|
|
| 441 | 630x |
thisVarLogMargLik = getGlmVarLogMargLik(mod, data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite, |
| 442 |
cache, zMode, zVar, laplaceApprox, residualDeviance); |
|
| 443 |
} |
|
| 444 | ||
| 445 |
// if we get back NaN |
|
| 446 | 640x |
if (R_IsNaN(thisVarLogMargLik) == TRUE) |
| 447 |
{
|
|
| 448 |
// increment counter of bad models |
|
| 449 | ! |
bookkeep.nanCounter++; |
| 450 |
// we do not save this here, because for the sampling mode we will have a different function! |
|
| 451 |
} |
|
| 452 |
else |
|
| 453 |
{
|
|
| 454 |
// put all into the modelInfo |
|
| 455 | 640x |
GlmModelInfo info(thisVarLogMargLik, thisLogPrior, cache, zMode, zVar, laplaceApprox, residualDeviance); |
| 456 | ||
| 457 |
// altogether we have the model: |
|
| 458 | 640x |
Model thisModel(mod, info); |
| 459 | ||
| 460 |
// and insert it into the model space |
|
| 461 | 640x |
if (space.size() >= bookkeep.nModels) |
| 462 |
{
|
|
| 463 | ! |
set<Model>::iterator it = space.begin(); |
| 464 | ! |
if (* it < thisModel) |
| 465 |
{ // compare this model to the least probable model in the set
|
|
| 466 | ! |
space.erase(it); |
| 467 | ! |
space.insert(thisModel); // exchange if it is better than this worst model in the set |
| 468 |
} |
|
| 469 |
} |
|
| 470 |
else |
|
| 471 |
{
|
|
| 472 | 640x |
space.insert(thisModel); |
| 473 |
} |
|
| 474 | ||
| 475 |
// compute log posterior probability (up to an additive constant) and |
|
| 476 |
// append it to the safe sum object |
|
| 477 | 640x |
bookkeep.modelLogPosteriors.add(thisVarLogMargLik + thisLogPrior); |
| 478 | ||
| 479 |
// update inclusion probabilities for covariate (groups) |
|
| 480 | 640x |
mod.pushInclusionProbs(fpInfo, ucInfo, bookkeep); //TODO |
| 481 | ||
| 482 |
// increment distinct models counter |
|
| 483 | 640x |
bookkeep.modelCounter++; |
| 484 |
} |
|
| 485 | ||
| 486 |
// display computation progress at each percent: |
|
| 487 | 1920x |
if (((bookkeep.modelCounter + bookkeep.nanCounter) % |
| 488 | 1920x |
std::max(data.totalNumber / 100, |
| 489 | 640x |
static_cast<PosLargeInt> (1)) |
| 490 |
== 0) |
|
| 491 |
&& bookkeep.verbose) // but only print if verbose option was chosen |
|
| 492 |
{
|
|
| 493 | ! |
Rprintf("-");
|
| 494 |
} |
|
| 495 |
} |
|
| 496 | ||
| 497 | ||
| 498 |
// ***************************************************************************************************// |
|
| 499 | ||
| 500 | ||
| 501 |
//TODO FINISH THIS FUNCTION |
|
| 502 | ||
| 503 |
// compute only the models in the list R-list "R_modelConfigs" |
|
| 504 |
List |
|
| 505 | ! |
glmModelsInList(const DataValues& data, |
| 506 |
const FpInfo& fpInfo, |
|
| 507 |
const UcInfo& ucInfo, |
|
| 508 |
const FixInfo& fixInfo, |
|
| 509 |
Book& bookkeep, |
|
| 510 |
const GlmModelConfig& config, |
|
| 511 |
const GaussHermite& gaussHermite, |
|
| 512 |
const List& rcpp_modelConfigs) |
|
| 513 |
{
|
|
| 514 |
// ------------ |
|
| 515 |
// bookkeeping: |
|
| 516 | ||
| 517 |
// for computation of inclusion probs: |
|
| 518 |
// array of IndexSafeSum objects. |
|
| 519 |
// const int cgwp_length = fpInfo.nFps + ucInfo.nUcGroups; |
|
| 520 |
// //IndexSafeSum cgwp[fpInfo.nFps + ucInfo.nUcGroups]; |
|
| 521 |
// IndexSafeSum cgwp[cgwp_length]; |
|
| 522 |
// bookkeep.covGroupWisePosteriors = cgwp; |
|
| 523 | ||
| 524 | ! |
std::vector<IndexSafeSum> cgwp(fpInfo.nFps + ucInfo.nUcGroups); |
| 525 | ! |
bookkeep.covGroupWisePosteriors = cgwp; |
| 526 |
|
|
| 527 |
|
|
| 528 |
|
|
| 529 |
// start the set of ordered models: |
|
| 530 | ! |
set<Model> orderedModels; |
| 531 | ||
| 532 |
// check that the set is large enough for all models in the list |
|
| 533 | ! |
if (orderedModels.max_size() < static_cast<set<Model>::size_type >(rcpp_modelConfigs.size())) |
| 534 | ! |
Rf_error("\nlist of model space is too large - cannot compute every model\n");
|
| 535 | ||
| 536 |
// ------------ |
|
| 537 |
// process all models in the list: |
|
| 538 | ||
| 539 | ! |
for(R_len_t i = 0; i < rcpp_modelConfigs.size(); ++i) |
| 540 |
{
|
|
| 541 | ! |
as<List>(rcpp_modelConfigs[i]); |
| 542 |
|
|
| 543 |
// this is the current model config: |
|
| 544 | ! |
ModelPar modelConfig(as<List>(rcpp_modelConfigs[i]), |
| 545 | ! |
fpInfo); |
| 546 | ||
| 547 |
// compute this one |
|
| 548 | ! |
computeGlm(modelConfig, orderedModels, |
| 549 |
data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
|
| 550 | ||
| 551 |
} |
|
| 552 | ||
| 553 |
// ------------ |
|
| 554 |
// we have finished. |
|
| 555 | ||
| 556 |
// now echo statistics: |
|
| 557 | ! |
if (bookkeep.verbose) |
| 558 |
{
|
|
| 559 | ! |
Rprintf("\nActual number of possible models: %lu",
|
| 560 |
bookkeep.modelCounter); |
|
| 561 | ! |
Rprintf("\nNumber of non-identifiable models: %lu",
|
| 562 |
bookkeep.nanCounter); |
|
| 563 | ! |
Rprintf("\nNumber of saved possible models: %zu\n",
|
| 564 |
orderedModels.size()); |
|
| 565 |
} |
|
| 566 | ||
| 567 |
// ------------ |
|
| 568 |
// allocate the return list |
|
| 569 | ! |
List ret(orderedModels.size()); |
| 570 | ||
| 571 |
// and fill it: |
|
| 572 | ||
| 573 |
// normalize posterior probabilities and correct log prior of the models to return |
|
| 574 |
// (we do not know here the normalizing constant for the marginal likelihoods!) |
|
| 575 | ! |
const long double logNormConst = bookkeep.modelLogPosteriors.logSumExp(); |
| 576 | ||
| 577 |
// first the single models |
|
| 578 | ! |
PosInt i = 0; |
| 579 | ! |
for (set<Model>::const_reverse_iterator |
| 580 | ! |
j = orderedModels.rbegin(); |
| 581 | ! |
j != orderedModels.rend(); |
| 582 |
j++) |
|
| 583 |
{
|
|
| 584 | ! |
ret[i++] = j->convert2list(fpInfo, |
| 585 |
logNormConst, |
|
| 586 | ! |
bookkeep); |
| 587 |
} |
|
| 588 | ||
| 589 |
// then some attributes: |
|
| 590 | ||
| 591 | ! |
NumericVector inc(fpInfo.nFps + ucInfo.nUcGroups); |
| 592 | ! |
for (R_len_t i = 0; i != inc.size(); ++i) |
| 593 |
{
|
|
| 594 | ! |
inc[i] = bookkeep.covGroupWisePosteriors[i].sumNormalizedExp(bookkeep.modelLogPosteriors, logNormConst); |
| 595 |
} |
|
| 596 | ! |
ret.attr("inclusionProbs") = inc;
|
| 597 | ! |
ret.attr("numVisited") = static_cast<double>(bookkeep.modelCounter);
|
| 598 | ! |
ret.attr("logNormConst") = static_cast<double>(logNormConst);
|
| 599 | ||
| 600 |
// ------------ |
|
| 601 |
// finally return the list. |
|
| 602 | ! |
return ret; |
| 603 |
} |
|
| 604 | ||
| 605 | ||
| 606 |
// ***************************************************************************************************// |
|
| 607 | ||
| 608 |
// recursion via: |
|
| 609 |
void |
|
| 610 | 10x |
glmPermPars(PosInt pos, // current position in parameter vector, starting from 0 - copied. |
| 611 |
ModelPar mod, // is copied every time! everything else is call by reference. |
|
| 612 |
set<Model>& space, // the model space |
|
| 613 |
const DataValues& data, |
|
| 614 |
const FpInfo& fpInfo, |
|
| 615 |
const UcInfo& ucInfo, |
|
| 616 |
const FixInfo& fixInfo, |
|
| 617 |
Book& bookkeep, |
|
| 618 |
const GlmModelConfig& config, |
|
| 619 |
const GaussHermite& gaussHermite) |
|
| 620 | ||
| 621 |
{
|
|
| 622 |
// if some fps are still left |
|
| 623 | 10x |
if (pos != fpInfo.nFps) |
| 624 |
{
|
|
| 625 |
// cardinality of the power set at this position |
|
| 626 | ! |
const PosInt card = fpInfo.fpcards.at(pos); |
| 627 | ||
| 628 |
// degree 0: |
|
| 629 | ! |
glmPermPars(pos + 1, mod, space, |
| 630 |
data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
|
| 631 | ||
| 632 |
// different degrees for fp at pos: |
|
| 633 |
// degrees 1, ..., fpmax |
|
| 634 | ! |
for (PosInt deg = 1; deg <= fpInfo.fpmaxs.at(pos); deg++) |
| 635 |
{
|
|
| 636 |
// increment sums of fp degrees |
|
| 637 | ! |
mod.fpSize++; |
| 638 | ||
| 639 |
// partition of deg into card parts |
|
| 640 | ! |
IntVector part (card); |
| 641 | ||
| 642 |
// internal variables for comp_next |
|
| 643 | ! |
bool more1 = false; |
| 644 | ! |
int h(0), t(0); |
| 645 | ||
| 646 | ! |
do |
| 647 |
{
|
|
| 648 |
// next partition of deg into card parts |
|
| 649 | ! |
comp_next(deg, card, part, &more1, h, t); |
| 650 | ||
| 651 |
// convert into multiset |
|
| 652 | ! |
mod.fpPars.at(pos) = freqvec2Powers(part, card); |
| 653 | ||
| 654 |
// and go on |
|
| 655 | ! |
glmPermPars(pos + 1, mod, space, |
| 656 |
data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
|
| 657 |
} |
|
| 658 |
while (more1); |
|
| 659 |
} |
|
| 660 |
} |
|
| 661 |
else // no fps left (all FPs have received their powers) |
|
| 662 |
{
|
|
| 663 |
// no uc group |
|
| 664 | 10x |
computeGlm(mod, space, |
| 665 |
data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); //TODO IS THIS THE NULL MODEL? IF SO ADD NULL+FIXED NEXT |
|
| 666 | ||
| 667 |
// different positive number (deg) of uc groups |
|
| 668 | 70x |
for (PosInt deg = 1; deg <= ucInfo.nUcGroups; deg++) |
| 669 |
{
|
|
| 670 |
// partition of deg into card parts |
|
| 671 | 60x |
IntVector subset (deg); |
| 672 | ||
| 673 |
// internal variables for ksub_next |
|
| 674 | 60x |
bool more2 = false; |
| 675 | 60x |
int m(0), m2(0); |
| 676 | ||
| 677 | 630x |
do |
| 678 |
{
|
|
| 679 |
// next subset (positive integers) |
|
| 680 | 630x |
ksub_next(ucInfo.nUcGroups, deg, subset, &more2, m, m2); |
| 681 | ||
| 682 |
// convert into set |
|
| 683 | 630x |
mod.ucPars = IntSet(subset.data(), subset.data() + deg); |
| 684 | ||
| 685 |
// and compute this model |
|
| 686 | 630x |
computeGlm(mod, space, |
| 687 |
data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
|
| 688 |
} |
|
| 689 |
while (more2); |
|
| 690 |
} |
|
| 691 |
} |
|
| 692 |
} |
|
| 693 | ||
| 694 |
// ***************************************************************************************************// |
|
| 695 | ||
| 696 |
List |
|
| 697 | 140x |
glmSampling(const DataValues& data, |
| 698 |
const FpInfo& fpInfo, |
|
| 699 |
const UcInfo& ucInfo, |
|
| 700 |
const FixInfo& fixInfo, |
|
| 701 |
Book& bookkeep, |
|
| 702 |
const GlmModelConfig& config, |
|
| 703 |
const GaussHermite& gaussHermite) |
|
| 704 |
{
|
|
| 705 |
// models which can be found during chain run can be cached in here: |
|
| 706 | 140x |
ModelCache modelCache(bookkeep.nCache); |
| 707 | ||
| 708 |
// upper limit for num of columns: min(n, maximum fixed + fp + uc columns). |
|
| 709 | 140x |
PosInt maxDim = std::min(static_cast<PosInt>(data.nObs), 1 + fpInfo.maxFpDim + ucInfo.maxUcDim); |
| 710 | ||
| 711 |
// the FP range |
|
| 712 | 140x |
const PosIntSet fpRange = constructSequence(fpInfo.nFps); |
| 713 | ||
| 714 |
// start model is the null model! |
|
| 715 |
ModelMcmc old(fpInfo, |
|
| 716 |
ucInfo, |
|
| 717 |
maxDim, |
|
| 718 | 140x |
config.nullModelLogMargLik); |
| 719 | ||
| 720 |
// insert this model into the cache |
|
| 721 | 140x |
double logPrior = getVarLogPrior(old.modPar, |
| 722 |
fpInfo, |
|
| 723 |
ucInfo, |
|
| 724 |
fixInfo, |
|
| 725 |
bookkeep); |
|
| 726 | 140x |
old.logPrior = logPrior; |
| 727 | ||
| 728 |
// put all into the modelInfo |
|
| 729 | 140x |
GlmModelInfo startInfo(old.logMargLik, logPrior, Cache(), R_NaReal, R_NaReal, R_NaReal, 0.0); |
| 730 | ||
| 731 | 140x |
modelCache.insert(old.modPar, startInfo); |
| 732 | ||
| 733 |
// start with this model config |
|
| 734 | 140x |
ModelMcmc now(old); |
| 735 | ||
| 736 | 140x |
if(fixInfo.nFixGroups > 0){
|
| 737 |
// move to the null model + fixed covariates **********************************************// |
|
| 738 |
|
|
| 739 |
// add the fixed covariates to the model configuration |
|
| 740 | ! |
IntSet s; |
| 741 | ! |
for (unsigned int i = 0; i < fixInfo.nFixGroups; ++i) |
| 742 | ! |
s.insert(s.end(), i+1); |
| 743 | ! |
now.modPar.fixPars = s; |
| 744 |
|
|
| 745 |
//get log prior for null+fixed model |
|
| 746 | ! |
double logPrior2 = getVarLogPrior(now.modPar, |
| 747 |
fpInfo, |
|
| 748 |
ucInfo, |
|
| 749 |
fixInfo, |
|
| 750 |
bookkeep); |
|
| 751 |
|
|
| 752 |
// and marginal log like |
|
| 753 | ! |
double zMode = 0.0; |
| 754 | ! |
double zVar = 0.0; |
| 755 | ! |
double laplaceApprox = 0.0; |
| 756 | ! |
double residualDeviance = R_NaReal; |
| 757 | ! |
Cache cache; |
| 758 |
|
|
| 759 | ! |
now.logMargLik = getGlmVarLogMargLik(now.modPar, |
| 760 |
data, |
|
| 761 |
fpInfo, |
|
| 762 |
ucInfo, |
|
| 763 |
fixInfo, |
|
| 764 |
bookkeep, |
|
| 765 |
config, |
|
| 766 |
gaussHermite, |
|
| 767 |
cache, |
|
| 768 |
zMode, |
|
| 769 |
zVar, |
|
| 770 |
laplaceApprox, |
|
| 771 |
residualDeviance); |
|
| 772 |
|
|
| 773 |
// put all into the modelInfo |
|
| 774 | ! |
GlmModelInfo start2Info(now.logMargLik, logPrior2, Cache(), R_NaReal, R_NaReal, R_NaReal, 0.0); |
| 775 |
|
|
| 776 | ! |
modelCache.insert(now.modPar, start2Info); |
| 777 |
|
|
| 778 |
// start with this model config |
|
| 779 | ! |
ModelMcmc now2(now); |
| 780 |
|
|
| 781 | ! |
now = now2; |
| 782 |
} |
|
| 783 |
|
|
| 784 |
// Start MCMC sampler***********************************************************// |
|
| 785 | ||
| 786 | 140x |
GetRNGstate(); // use R's random number generator |
| 787 | ||
| 788 | 501740x |
for(PosLargeInt t = 0; t != bookkeep.chainlength; /* ++t explicitly at the end */) |
| 789 |
{
|
|
| 790 |
double logPropRatio; // log proposal ratio |
|
| 791 | ||
| 792 |
// randomly select move type |
|
| 793 | 501600x |
double u1 = unif_rand(); |
| 794 | ||
| 795 | 501600x |
if (u1 < old.birthprob) |
| 796 |
{ // BIRTH
|
|
| 797 | 148510x |
PosInt newCovInd = *discreteUniform<PosIntSet>(old.freeCovs); |
| 798 | ||
| 799 | 148510x |
if (newCovInd <= fpInfo.nFps) |
| 800 |
{ // some fp index
|
|
| 801 | 24580x |
Int powerIndex = discreteUniform<Int>(0, fpInfo.fpcards[newCovInd-1]); |
| 802 | 24580x |
now.modPar.fpPars.at(newCovInd-1).insert(powerIndex); |
| 803 | 24580x |
now.modPar.fpSize++; // correct invariants |
| 804 | 24580x |
now.dim++; |
| 805 | 24580x |
PosInt newPowersEqualPowerIndex = count(now.modPar.fpPars.at(newCovInd-1).begin(), now.modPar.fpPars.at(newCovInd-1).end(), powerIndex); |
| 806 | 24580x |
PosInt m = old.modPar.fpPars.at(newCovInd-1).size(); |
| 807 | ||
| 808 | 24580x |
logPropRatio = log(double(newPowersEqualPowerIndex)) + log(double(fpInfo.fpcards[newCovInd-1])) - log1p(m); |
| 809 |
} |
|
| 810 |
else |
|
| 811 |
{ // uc index
|
|
| 812 | 123930x |
Int index = *discreteUniform(old.freeUcs); |
| 813 | 123930x |
now.modPar.ucPars.insert(index); |
| 814 | 123930x |
now.dim += ucInfo.ucSizes.at(index - 1); |
| 815 | 123930x |
now.freeUcs = now.modPar.getFreeUcs(ucInfo.ucSizes, now.dim, maxDim); |
| 816 | ||
| 817 | 123930x |
logPropRatio = log(double(old.freeUcs.size())) - log(double(now.modPar.ucPars.size())); |
| 818 |
} |
|
| 819 | ||
| 820 | 148510x |
now.presentCovs.insert(newCovInd); |
| 821 | 148510x |
now.freeCovs = now.modPar.getFreeCovs(fpInfo, now.freeUcs, now.dim, maxDim); |
| 822 | ||
| 823 | 148510x |
if (now.dim == maxDim) |
| 824 |
{
|
|
| 825 | 9190x |
now.birthprob = 0; now.deathprob = now.moveprob = (now.modPar.fpSize > 0) ? 1.0 / 3 : 0.5; |
| 826 |
} else |
|
| 827 |
{
|
|
| 828 | 139320x |
now.birthprob = now.deathprob = now.moveprob = (now.modPar.fpSize > 0) ? 0.25 : 1.0 / 3; |
| 829 |
} |
|
| 830 | ||
| 831 | 148510x |
logPropRatio += log(now.deathprob) - log(old.birthprob) + log(double(old.freeCovs.size())) - log(double(now.presentCovs.size())); |
| 832 | ||
| 833 |
} |
|
| 834 | 353090x |
else if |
| 835 | 353090x |
(u1 < old.birthprob + old.deathprob) |
| 836 |
{ // DEATH
|
|
| 837 | 165070x |
PosInt oldCovInd = *discreteUniform(old.presentCovs); |
| 838 | ||
| 839 | 165070x |
if (oldCovInd <= fpInfo.nFps) |
| 840 |
{ // some fp index
|
|
| 841 | 24720x |
Powers::iterator powerIterator = discreteUniform(now.modPar.fpPars.at(oldCovInd-1)); |
| 842 | 24720x |
PosInt oldPowersEqualPowerIndex = count(old.modPar.fpPars.at(oldCovInd-1).begin(), old.modPar.fpPars.at(oldCovInd-1).end(), *powerIterator); |
| 843 | 24720x |
now.modPar.fpPars.at(oldCovInd-1).erase(powerIterator); |
| 844 | 24720x |
now.modPar.fpSize--; // correct invariants |
| 845 | 24720x |
now.dim--; |
| 846 | ||
| 847 | 24720x |
logPropRatio = - log(double(oldPowersEqualPowerIndex)) - log(double(fpInfo.fpcards[oldCovInd-1])) + log(double(old.modPar.fpPars.at(oldCovInd-1).size())); |
| 848 | ||
| 849 |
} else { // uc index
|
|
| 850 | 140350x |
IntSet::iterator IndIterator = discreteUniform(now.modPar.ucPars); |
| 851 |
// now.modPar.ucSize--; |
|
| 852 | 140350x |
now.dim -= ucInfo.ucSizes.at(*IndIterator - 1); |
| 853 | 140350x |
now.modPar.ucPars.erase(IndIterator); |
| 854 | 140350x |
now.freeUcs = now.modPar.getFreeUcs(ucInfo.ucSizes, now.dim, maxDim); |
| 855 | 140350x |
logPropRatio = log(double(old.modPar.ucPars.size())) - log(double(now.freeUcs.size())); |
| 856 |
} |
|
| 857 | 165070x |
now.presentCovs = now.modPar.getPresentCovs(); |
| 858 | 165070x |
now.freeCovs = now.modPar.getFreeCovs(fpInfo, now.freeUcs, now.dim, maxDim); |
| 859 | 165070x |
if (now.dim == 1) |
| 860 |
{
|
|
| 861 | 55400x |
now.birthprob = 1; now.deathprob = now.moveprob = 0; |
| 862 |
} else |
|
| 863 |
{
|
|
| 864 | 109670x |
now.birthprob = now.deathprob = now.moveprob = (now.modPar.fpSize > 0) ? 0.25 : 1.0 / 3.0; |
| 865 |
} |
|
| 866 | 165070x |
logPropRatio += log(now.birthprob) - log(old.deathprob) + log(double(old.presentCovs.size())) - log(double(now.freeCovs.size())); |
| 867 | ||
| 868 |
} |
|
| 869 | 188020x |
else if (u1 < old.birthprob + old.deathprob + old.moveprob) |
| 870 |
{ // MOVE
|
|
| 871 | 162120x |
PosInt CovInd = *discreteUniform<PosIntSet>(old.presentCovs); |
| 872 | ||
| 873 | 162120x |
if (CovInd <= fpInfo.nFps) |
| 874 |
{ // some fp index
|
|
| 875 | 26400x |
Powers::iterator powerIterator = discreteUniform(now.modPar.fpPars.at(CovInd-1)); |
| 876 | 26400x |
PosInt oldPowersEqualPowerIndex = count(old.modPar.fpPars.at(CovInd-1).begin(), old.modPar.fpPars.at(CovInd-1).end(), *powerIterator); |
| 877 | 26400x |
now.modPar.fpPars.at(CovInd-1).erase(powerIterator); |
| 878 | 26400x |
Int powerIndex = discreteUniform<Int>(0, fpInfo.fpcards[CovInd-1]); |
| 879 | 26400x |
now.modPar.fpPars.at(CovInd-1).insert(powerIndex); |
| 880 | 26400x |
PosInt newPowersEqualPowerIndex = count(now.modPar.fpPars.at(CovInd-1).begin(), now.modPar.fpPars.at(CovInd-1).end(), powerIndex); |
| 881 |
// free, present Covs and move type probs are unchanged |
|
| 882 | 26400x |
logPropRatio = log(double(newPowersEqualPowerIndex)) - log(double(oldPowersEqualPowerIndex)); |
| 883 |
} |
|
| 884 |
else |
|
| 885 |
{ // uc index
|
|
| 886 | 135720x |
IntSet::iterator IndIterator = discreteUniform(now.modPar.ucPars); |
| 887 | 135720x |
now.dim -= ucInfo.ucSizes.at(*IndIterator - 1); |
| 888 | 135720x |
now.modPar.ucPars.erase(IndIterator); |
| 889 | 135720x |
now.freeUcs = now.modPar.getFreeUcs(ucInfo.ucSizes, now.dim, maxDim); |
| 890 | 135720x |
Int index = *discreteUniform<IntSet>(now.freeUcs); |
| 891 | 135720x |
now.modPar.ucPars.insert(index); |
| 892 | 135720x |
now.dim += ucInfo.ucSizes.at(index - 1); |
| 893 | 135720x |
now.freeUcs = now.modPar.getFreeUcs(ucInfo.ucSizes, now.dim, maxDim); |
| 894 |
// here something may change, therefore: |
|
| 895 | 135720x |
now.freeCovs = now.modPar.getFreeCovs(fpInfo, now.freeUcs, now.dim, maxDim); |
| 896 | 135720x |
if (now.dim == maxDim) |
| 897 |
{
|
|
| 898 |
now.birthprob = 0; now.deathprob = now.moveprob = (now.modPar.fpSize > 0) ? 1.0 / 3.0 : 0.5; |
|
| 899 |
} |
|
| 900 |
else |
|
| 901 |
{
|
|
| 902 |
now.birthprob = now.deathprob = now.moveprob = (now.modPar.fpSize > 0) ? 0.25 : 1.0 / 3.0; |
|
| 903 |
} |
|
| 904 | 135720x |
logPropRatio = 0.0; |
| 905 |
} |
|
| 906 |
} else { // SWITCH (of FP vectors)
|
|
| 907 |
// select only the FP present covs |
|
| 908 | 25900x |
PosIntSet presentFps = removeElement(old.presentCovs, fpInfo.nFps + 1); |
| 909 | ||
| 910 |
// so we have the first power vector: |
|
| 911 | 25900x |
PosInt firstFpInd = *discreteUniform<PosIntSet>(presentFps); |
| 912 | 25900x |
Powers first = now.modPar.fpPars.at(firstFpInd - 1); |
| 913 | ||
| 914 |
// the second power vector from all other FPs |
|
| 915 | 25900x |
PosIntSet otherFps = removeElement(fpRange, firstFpInd); |
| 916 | 25900x |
PosInt secondFpInd = *discreteUniform<PosIntSet>(otherFps); |
| 917 | 25900x |
Powers second = now.modPar.fpPars.at(secondFpInd - 1); |
| 918 | ||
| 919 |
// save the first |
|
| 920 | 25900x |
Powers saveFirst = first; |
| 921 | ||
| 922 |
// copy second to first |
|
| 923 | 25900x |
now.modPar.fpPars.at(firstFpInd - 1) = second; |
| 924 | ||
| 925 |
// and save to second |
|
| 926 | 25900x |
now.modPar.fpPars.at(secondFpInd - 1) = saveFirst; |
| 927 | ||
| 928 |
// so now we have switched the power vectors. |
|
| 929 | ||
| 930 |
// move type probs are not changed, because the number of present FPs is unchanged, |
|
| 931 |
// as well as the dimension of the model. |
|
| 932 | ||
| 933 |
// but carefully update the information which covariates are free and which are present |
|
| 934 | 25900x |
now.freeCovs = now.modPar.getFreeCovs(fpInfo, now.freeUcs, now.dim, maxDim); |
| 935 | 25900x |
now.presentCovs = now.modPar.getPresentCovs(); |
| 936 | ||
| 937 |
// and the proposal ratio is 1, thus the log proposal ratio is 0: |
|
| 938 | 25900x |
logPropRatio = 0; |
| 939 |
} |
|
| 940 | ||
| 941 |
// search for log marg lik of proposed model |
|
| 942 | 501600x |
GlmModelInfo nowInfo = modelCache.getModelInfo(now.modPar); |
| 943 | ||
| 944 | 501600x |
if (R_IsNA(nowInfo.logMargLik)) |
| 945 |
{ // "now" is a new model
|
|
| 946 | ||
| 947 | 15040x |
double zMode = 0.0; |
| 948 | 15040x |
double zVar = 0.0; |
| 949 | 15040x |
double laplaceApprox = 0.0; |
| 950 | 15040x |
double residualDeviance = R_NaReal; |
| 951 | 15040x |
Cache cache; |
| 952 | ||
| 953 |
// so we must compute the log marg lik now. |
|
| 954 | 15040x |
now.logMargLik = getGlmVarLogMargLik(now.modPar, |
| 955 |
data, |
|
| 956 |
fpInfo, |
|
| 957 |
ucInfo, |
|
| 958 |
fixInfo, |
|
| 959 |
bookkeep, |
|
| 960 |
config, |
|
| 961 |
gaussHermite, |
|
| 962 |
cache, |
|
| 963 |
zMode, |
|
| 964 |
zVar, |
|
| 965 |
laplaceApprox, |
|
| 966 |
residualDeviance); |
|
| 967 | ||
| 968 |
// check if the new model is OK |
|
| 969 | 15040x |
if (R_IsNaN(now.logMargLik)) |
| 970 |
{
|
|
| 971 |
// we do not save this model in the model cache |
|
| 972 | ! |
bookkeep.nanCounter++; |
| 973 |
} |
|
| 974 |
else |
|
| 975 |
{ // OK: then compute the rest, and insert into model cache
|
|
| 976 | ||
| 977 | 15040x |
now.logPrior = getVarLogPrior(now.modPar, |
| 978 |
fpInfo, |
|
| 979 |
ucInfo, |
|
| 980 |
fixInfo, |
|
| 981 |
bookkeep); |
|
| 982 | ||
| 983 |
// insert the model parameter/info into the model cache |
|
| 984 | ||
| 985 |
// problem: this could erase the old model from the model cache, |
|
| 986 |
// and invalidate the iterator old.mapPos! |
|
| 987 |
// ==> so we cannot work with the iterators here. |
|
| 988 | 15040x |
modelCache.insert(now.modPar, |
| 989 | 30080x |
GlmModelInfo(now.logMargLik, |
| 990 |
now.logPrior, |
|
| 991 |
cache, |
|
| 992 |
zMode, |
|
| 993 |
zVar, |
|
| 994 |
laplaceApprox, |
|
| 995 |
residualDeviance)); |
|
| 996 |
} |
|
| 997 |
} |
|
| 998 |
else // "now" is an old model |
|
| 999 |
{
|
|
| 1000 |
// extract log marg lik and prior from the modelInfo object |
|
| 1001 | 486560x |
now.logMargLik = nowInfo.logMargLik; |
| 1002 | 486560x |
now.logPrior = nowInfo.logPrior; |
| 1003 |
} |
|
| 1004 | ||
| 1005 |
// decide acceptance: |
|
| 1006 |
// for acceptance, the new model must be valid and the acceptance must be sampled |
|
| 1007 | 1003200x |
if ((R_IsNaN(now.logMargLik) == FALSE) && |
| 1008 | 501600x |
(unif_rand() <= exp(now.logMargLik - old.logMargLik + now.logPrior - old.logPrior + logPropRatio))) |
| 1009 |
{ // acceptance
|
|
| 1010 | 139810x |
old = now; |
| 1011 |
} |
|
| 1012 |
else |
|
| 1013 |
{ // rejection
|
|
| 1014 | 361790x |
now = old; |
| 1015 |
} |
|
| 1016 | ||
| 1017 |
// so now definitely old == now, and we can |
|
| 1018 |
// increment the associated sampling frequency. |
|
| 1019 | 501600x |
modelCache.incrementFrequency(now.modPar); |
| 1020 | ||
| 1021 |
// echo progress? |
|
| 1022 | 513200x |
if((++t % std::max(bookkeep.chainlength / 100, static_cast<PosLargeInt>(1)) == 0) && |
| 1023 | 11600x |
bookkeep.verbose) |
| 1024 |
{
|
|
| 1025 | 1600x |
Rprintf("-"); // display computation progress at each percent
|
| 1026 |
} |
|
| 1027 |
} |
|
| 1028 | ||
| 1029 | 140x |
PutRNGstate(); // no RNs required anymore |
| 1030 | ||
| 1031 | ||
| 1032 |
// normalize posterior probabilities and correct log marg lik and log prior |
|
| 1033 | 140x |
const long double logNormConst = modelCache.getLogNormConstant(); |
| 1034 | ||
| 1035 |
// get the nModels best models from the cache as an R list |
|
| 1036 |
List ret = modelCache.getListOfBestModels(fpInfo, |
|
| 1037 |
logNormConst, |
|
| 1038 | 140x |
bookkeep); |
| 1039 | ||
| 1040 |
// set the attributes |
|
| 1041 | 140x |
ret.attr("numVisited") = modelCache.size();
|
| 1042 | 140x |
ret.attr("inclusionProbs") = modelCache.getInclusionProbs(logNormConst, fpInfo.nFps, ucInfo.nUcGroups);
|
| 1043 | 140x |
ret.attr("logNormConst") = logNormConst;
|
| 1044 | ||
| 1045 | 140x |
if (bookkeep.verbose){
|
| 1046 | 40x |
Rprintf("\nNumber of non-identifiable model proposals: %lu", bookkeep.nanCounter);
|
| 1047 | 40x |
Rprintf("\nNumber of total cached models: %d", modelCache.size());
|
| 1048 | 40x |
Rprintf("\nNumber of returned models: %d\n", Rf_length(ret));
|
| 1049 |
} |
|
| 1050 | ||
| 1051 |
// and return |
|
| 1052 | 280x |
return ret; |
| 1053 |
} |
|
| 1054 | ||
| 1055 | ||
| 1056 |
// ***************************************************************************************************// |
|
| 1057 | ||
| 1058 | ||
| 1059 |
List |
|
| 1060 | 10x |
glmExhaustive(const DataValues& data, |
| 1061 |
const FpInfo& fpInfo, |
|
| 1062 |
const UcInfo& ucInfo, |
|
| 1063 |
const FixInfo& fixInfo, |
|
| 1064 |
Book& bookkeep, |
|
| 1065 |
const GlmModelConfig& config, |
|
| 1066 |
const GaussHermite& gaussHermite) |
|
| 1067 |
{
|
|
| 1068 |
// no map needed for exhaustive search, a set is the right thing: |
|
| 1069 | 10x |
set<Model> orderedModels; |
| 1070 | ||
| 1071 |
// check that the set is large enough for all models |
|
| 1072 | 10x |
if (orderedModels.max_size() < data.totalNumber) |
| 1073 | ! |
Rf_error("\nmodel space is too large - cannot compute every model\n");
|
| 1074 | ||
| 1075 |
// start model |
|
| 1076 | 10x |
ModelPar startModel(fpInfo.nFps); |
| 1077 | ||
| 1078 |
// bookkeeping |
|
| 1079 | ||
| 1080 |
// for computation of inclusion probs: |
|
| 1081 |
// array of IndexSafeSum objects. |
|
| 1082 |
//const int cgwp_length = fpInfo.nFps + ucInfo.nUcGroups; |
|
| 1083 |
|
|
| 1084 |
//IndexSafeSum cgwp[cgwp_length]; |
|
| 1085 |
//IndexSafeSum cgwp[fpInfo.nFps + ucInfo.nUcGroups]; |
|
| 1086 |
//bookkeep.covGroupWisePosteriors = cgwp; |
|
| 1087 | ||
| 1088 |
// now with vectors instead of arrays |
|
| 1089 | 10x |
std::vector<IndexSafeSum> cgwp(fpInfo.nFps + ucInfo.nUcGroups); |
| 1090 | 10x |
bookkeep.covGroupWisePosteriors = cgwp; |
| 1091 |
|
|
| 1092 |
|
|
| 1093 |
// calculate the true null model if we have any fixed covariates, |
|
| 1094 |
// otherwise it comes later |
|
| 1095 | 10x |
if(fixInfo.nFixGroups != 0) |
| 1096 | ! |
computeGlm(startModel, orderedModels, |
| 1097 |
data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
|
| 1098 |
|
|
| 1099 |
// add the fixed covariates to the model configuration |
|
| 1100 | 10x |
IntSet s; |
| 1101 |
for (unsigned int i = 0; i < fixInfo.nFixGroups; ++i) |
|
| 1102 | ! |
s.insert(s.end(), i+1); |
| 1103 | 10x |
startModel.fixPars = s; |
| 1104 |
|
|
| 1105 |
|
|
| 1106 |
// start computation |
|
| 1107 | 10x |
glmPermPars(0, startModel, orderedModels, |
| 1108 |
data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
|
| 1109 | ||
| 1110 |
// we have finished. |
|
| 1111 | ||
| 1112 |
// now echo statistics: |
|
| 1113 | 10x |
if (bookkeep.verbose) |
| 1114 |
{
|
|
| 1115 | ! |
Rprintf("\nActual number of possible models: %lu",
|
| 1116 |
bookkeep.modelCounter); |
|
| 1117 | ! |
Rprintf("\nNumber of non-identifiable models: %lu",
|
| 1118 |
bookkeep.nanCounter); |
|
| 1119 | ! |
Rprintf("\nNumber of saved possible models: %zu\n",
|
| 1120 |
orderedModels.size()); |
|
| 1121 |
} |
|
| 1122 | ||
| 1123 |
// normalize posterior probabilities of the models to return |
|
| 1124 |
// (we do not know here the normalizing constant for the marginal likelihoods!) |
|
| 1125 | 10x |
const long double logNormConst = bookkeep.modelLogPosteriors.logSumExp(); |
| 1126 | ||
| 1127 |
// allocate the return list |
|
| 1128 | 10x |
List ret(orderedModels.size()); |
| 1129 | ||
| 1130 |
// and fill it: |
|
| 1131 | ||
| 1132 |
// first the single models |
|
| 1133 | 10x |
R_len_t i = 0; |
| 1134 | 650x |
for (set<Model>::const_reverse_iterator |
| 1135 | 10x |
j = orderedModels.rbegin(); |
| 1136 | 650x |
j != orderedModels.rend(); |
| 1137 |
j++) |
|
| 1138 |
{
|
|
| 1139 | 1280x |
ret[i++] = j->convert2list(fpInfo, |
| 1140 |
logNormConst, |
|
| 1141 | 640x |
bookkeep); |
| 1142 |
} |
|
| 1143 | ||
| 1144 |
// then some attributes: |
|
| 1145 | ||
| 1146 | 10x |
NumericVector inc(fpInfo.nFps + ucInfo.nUcGroups); // TODO should I add fix here |
| 1147 | 70x |
for (R_len_t i = 0; i != inc.size(); ++i) |
| 1148 |
{
|
|
| 1149 | 60x |
inc[i] = bookkeep.covGroupWisePosteriors[i].sumNormalizedExp(bookkeep.modelLogPosteriors, logNormConst); |
| 1150 |
} |
|
| 1151 | 10x |
ret.attr("inclusionProbs") = inc;
|
| 1152 | 10x |
ret.attr("numVisited") = static_cast<double>(bookkeep.modelCounter);
|
| 1153 | 10x |
ret.attr("logNormConst") = static_cast<double>(logNormConst);
|
| 1154 | ||
| 1155 |
// finally return the list. |
|
| 1156 | 20x |
return ret; |
| 1157 |
} |
|
| 1158 | ||
| 1159 |
// ***************************************************************************************************// |
|
| 1160 | ||
| 1161 |
// 21/11/2012: add tbf option |
|
| 1162 |
// 03/12/2012: add Cox regression with tbfs |
|
| 1163 |
// 03/07/2013: remove nullModelInfo and only get nullModelLogMargLik |
|
| 1164 | ||
| 1165 |
// R call is: |
|
| 1166 |
// |
|
| 1167 |
//Ret <- |
|
| 1168 |
// .External (cpp_glmBayesMfp, |
|
| 1169 |
// data, |
|
| 1170 |
// fpInfos, |
|
| 1171 |
// ucInfos, |
|
| 1172 |
// fixInfos, |
|
| 1173 |
// searchConfig, |
|
| 1174 |
// distribution, |
|
| 1175 |
// options) |
|
| 1176 |
// [[Rcpp::export]] |
|
| 1177 |
SEXP |
|
| 1178 | 150x |
cpp_glmBayesMfp(List rcpp_data, List rcpp_fpInfos, List rcpp_ucInfos, List rcpp_fixInfos, |
| 1179 |
List rcpp_searchConfig, List rcpp_distribution, List rcpp_options ) |
|
| 1180 |
{
|
|
| 1181 |
// ---------------------------------------------------------------------------------- |
|
| 1182 |
// extract arguments |
|
| 1183 |
// ---------------------------------------------------------------------------------- |
|
| 1184 | ||
| 1185 |
// r_interface = CDR(r_interface); |
|
| 1186 |
// List rcpp_data(CAR(r_interface)); |
|
| 1187 |
// |
|
| 1188 |
// r_interface = CDR(r_interface); |
|
| 1189 |
// List rcpp_fpInfos(CAR(r_interface)); |
|
| 1190 |
// |
|
| 1191 |
// r_interface = CDR(r_interface); |
|
| 1192 |
// List rcpp_ucInfos(CAR(r_interface)); |
|
| 1193 |
// |
|
| 1194 |
// r_interface = CDR(r_interface); |
|
| 1195 |
// List rcpp_fixInfos(CAR(r_interface)); |
|
| 1196 |
// |
|
| 1197 |
// r_interface = CDR(r_interface); |
|
| 1198 |
// List rcpp_searchConfig(CAR(r_interface)); |
|
| 1199 |
// |
|
| 1200 |
// r_interface = CDR(r_interface); |
|
| 1201 |
// List rcpp_distribution(CAR(r_interface)); |
|
| 1202 |
// |
|
| 1203 |
// r_interface = CDR(r_interface); |
|
| 1204 |
// List rcpp_options(CAR(r_interface)); |
|
| 1205 | ||
| 1206 | ||
| 1207 |
// ---------------------------------------------------------------------------------- |
|
| 1208 |
// unpack the R objects |
|
| 1209 |
// ---------------------------------------------------------------------------------- |
|
| 1210 | ||
| 1211 |
// data: |
|
| 1212 | 300x |
const NumericMatrix n_x = rcpp_data["x"]; |
| 1213 | 150x |
const AMatrix x(n_x.begin(), n_x.nrow(), |
| 1214 | 150x |
n_x.ncol()); |
| 1215 | ||
| 1216 | 300x |
const NumericMatrix n_xCentered = rcpp_data["xCentered"]; |
| 1217 | 150x |
const AMatrix xCentered(n_xCentered.begin(), n_xCentered.nrow(), |
| 1218 | 150x |
n_xCentered.ncol()); |
| 1219 | ||
| 1220 | 300x |
const NumericVector n_y = rcpp_data["y"]; |
| 1221 | 150x |
const AVector y(n_y.begin(), n_y.size()); |
| 1222 | ||
| 1223 | 300x |
const IntVector censInd = as<IntVector>(rcpp_data["censInd"]); |
| 1224 | ||
| 1225 |
// FP configuration: |
|
| 1226 | ||
| 1227 |
// vector of maximum fp degrees |
|
| 1228 | 300x |
const PosIntVector fpmaxs = as<PosIntVector>(rcpp_fpInfos["fpmaxs"]); |
| 1229 |
// corresponding vector of fp column indices |
|
| 1230 | 300x |
const PosIntVector fppos = rcpp_fpInfos["fppos"]; |
| 1231 |
// corresponding vector of power set cardinalities |
|
| 1232 | 300x |
const PosIntVector fpcards = rcpp_fpInfos["fpcards"]; |
| 1233 |
// names of fp terms |
|
| 1234 | 300x |
const StrVector fpnames = rcpp_fpInfos["fpnames"]; |
| 1235 | ||
| 1236 | ||
| 1237 |
// UC configuration: |
|
| 1238 | ||
| 1239 | 300x |
const PosIntVector ucIndices = rcpp_ucInfos["ucIndices"]; |
| 1240 | 300x |
List rcpp_ucColList = rcpp_ucInfos["ucColList"]; |
| 1241 | ||
| 1242 | 150x |
std::vector<PosIntVector> ucColList; |
| 1243 | 690x |
for (R_len_t i = 0; i != rcpp_ucColList.length(); ++i) |
| 1244 |
{
|
|
| 1245 | 540x |
ucColList.push_back(as<PosIntVector>(rcpp_ucColList[i])); |
| 1246 |
} |
|
| 1247 | ||
| 1248 | ||
| 1249 |
// Fixed Covariate configuration: |
|
| 1250 | ||
| 1251 | 300x |
const PosIntVector fixIndices = rcpp_fixInfos["fixIndices"]; |
| 1252 | 300x |
List rcpp_fixColList = rcpp_fixInfos["fixColList"]; |
| 1253 | ||
| 1254 | 150x |
std::vector<PosIntVector> fixColList; |
| 1255 |
for (R_len_t i = 0; i != rcpp_fixColList.length(); ++i) |
|
| 1256 |
{
|
|
| 1257 | ! |
fixColList.push_back(as<PosIntVector>(rcpp_fixColList[i])); |
| 1258 |
} |
|
| 1259 | ||
| 1260 | ||
| 1261 |
|
|
| 1262 |
// model search configuration: |
|
| 1263 | ||
| 1264 | 150x |
const double totalNumber = as<double>(rcpp_searchConfig["totalNumber"]); |
| 1265 | 150x |
const PosInt nModels = as<PosInt>(rcpp_searchConfig["nModels"]); |
| 1266 | 150x |
const bool empiricalBayes = as<bool>(rcpp_searchConfig["empiricalBayes"]); |
| 1267 | 150x |
const bool useFixedg = as<bool>(rcpp_searchConfig["useFixedg"]); |
| 1268 | 150x |
const bool doSampling = as<bool>(rcpp_searchConfig["doSampling"]); |
| 1269 | 150x |
const double chainlength = as<double>(rcpp_searchConfig["chainlength"]); |
| 1270 | 150x |
const PosInt nCache = as<PosInt>(rcpp_searchConfig["nCache"]); |
| 1271 | 150x |
const double largeVariance = as<double>(rcpp_searchConfig["largeVariance"]); |
| 1272 | 150x |
const bool useBfgs = as<bool>(rcpp_searchConfig["useBfgs"]); |
| 1273 | 150x |
const bool useFixedc = as<bool>(rcpp_searchConfig["useFixedc"]); |
| 1274 | ||
| 1275 |
// there might be a single model configuration saved in the searchConfig: |
|
| 1276 |
bool onlyComputeModelsInList; |
|
| 1277 |
try {
|
|
| 1278 | 150x |
onlyComputeModelsInList = true; |
| 1279 |
List rcpp_modelConfigs = rcpp_searchConfig["modelConfigs"]; |
|
| 1280 |
} catch (std::exception& e) {
|
|
| 1281 | 150x |
onlyComputeModelsInList = false; |
| 1282 |
} |
|
| 1283 |
// we need to try-catch it because there might be no element "modelConfigs" in |
|
| 1284 |
// rcpp_searchConfig in which case Rcpp throws an exception. |
|
| 1285 | ||
| 1286 |
// distributions info: |
|
| 1287 | ||
| 1288 | 300x |
const std::string modelPrior = as<std::string>(rcpp_distribution["modelPrior"]); |
| 1289 | 150x |
const bool doGlm = as<bool>(rcpp_distribution["doGlm"]); |
| 1290 | 150x |
const bool tbf = as<bool>(rcpp_distribution["tbf"]); |
| 1291 | 150x |
const double nullModelLogMargLik = as<double>(rcpp_distribution["nullModelLogMargLik"]); |
| 1292 | 150x |
const double nullModelDeviance = as<double>(rcpp_distribution["nullModelDeviance"]); |
| 1293 | 150x |
const double fixedg = as<double>(rcpp_distribution["fixedg"]); |
| 1294 | 150x |
const double empiricalMean = as<double>(rcpp_distribution["yMean"]); |
| 1295 | 150x |
const bool empiricalgPrior = as<bool>(rcpp_distribution["empiricalgPrior"]); |
| 1296 |
|
|
| 1297 | 300x |
S4 rcpp_gPrior = rcpp_distribution["gPrior"]; |
| 1298 | 300x |
List rcpp_family = rcpp_distribution["family"]; |
| 1299 | ||
| 1300 |
// other options: |
|
| 1301 | ||
| 1302 | 150x |
const bool verbose = as<bool>(rcpp_options["verbose"]); |
| 1303 | 150x |
const bool debug = as<bool>(rcpp_options["debug"]); |
| 1304 |
#ifdef _OPENMP |
|
| 1305 | 150x |
const bool useOpenMP = as<bool>(rcpp_options["useOpenMP"]); |
| 1306 |
#endif |
|
| 1307 | 300x |
const GaussHermite gaussHermite(as<List>(rcpp_options["gaussHermite"])); |
| 1308 | 150x |
const bool higherOrderCorrection = as<bool>(rcpp_options["higherOrderCorrection"]); |
| 1309 | ||
| 1310 | ||
| 1311 |
// ---------------------------------------------------------------------------------- |
|
| 1312 |
// further process input information |
|
| 1313 |
// ---------------------------------------------------------------------------------- |
|
| 1314 | ||
| 1315 |
// data: |
|
| 1316 | ||
| 1317 |
// only the intercept is always included, that is fixed, in the model |
|
| 1318 | 150x |
IntSet fixedCols; |
| 1319 | 150x |
fixedCols.insert(1); //vestigial code that isn't part of the new support for fixed vars |
| 1320 | ||
| 1321 | 150x |
const DataValues data(x, xCentered, y, censInd, totalNumber, fixedCols); |
| 1322 | ||
| 1323 |
// FP configuration: |
|
| 1324 | 150x |
const FpInfo fpInfo(fpcards, fppos, fpmaxs, fpnames, x); |
| 1325 | ||
| 1326 |
// UC configuration: |
|
| 1327 | ||
| 1328 |
// determine sizes of the UC groups, and the total size == maximum size reached together by all |
|
| 1329 |
// UC groups. |
|
| 1330 | 150x |
PosIntVector ucSizes; |
| 1331 | 150x |
PosInt maxUcDim = 0; |
| 1332 | 690x |
for (vector<PosIntVector>::const_iterator cols = ucColList.begin(); cols != ucColList.end(); ++cols) |
| 1333 |
{
|
|
| 1334 | 540x |
PosInt thisSize = cols->size(); |
| 1335 | ||
| 1336 | 540x |
maxUcDim += thisSize; |
| 1337 | 540x |
ucSizes.push_back(thisSize); |
| 1338 |
} |
|
| 1339 | 150x |
const UcInfo ucInfo(ucSizes, maxUcDim, ucIndices, ucColList); |
| 1340 | ||
| 1341 | ||
| 1342 |
// Fix configuration: |
|
| 1343 | ||
| 1344 |
// determine sizes of the fixed covariate groups, and the total size == maximum size reached together by all |
|
| 1345 |
// fix groups. |
|
| 1346 | 150x |
PosIntVector fixSizes; |
| 1347 | 150x |
PosInt maxFixDim = 0; |
| 1348 |
for (vector<PosIntVector>::const_iterator cols = fixColList.begin(); cols != fixColList.end(); ++cols) |
|
| 1349 |
{
|
|
| 1350 | ! |
PosInt thisSize = cols->size(); |
| 1351 | ||
| 1352 | ! |
maxFixDim += thisSize; |
| 1353 | ! |
fixSizes.push_back(thisSize); |
| 1354 |
} |
|
| 1355 | 150x |
const FixInfo fixInfo(fixSizes, maxFixDim, fixIndices, fixColList); |
| 1356 | ||
| 1357 | ||
| 1358 | ||
| 1359 | ||
| 1360 |
// model search configuration: |
|
| 1361 |
Book bookkeep(tbf, |
|
| 1362 |
doGlm, |
|
| 1363 |
empiricalBayes, |
|
| 1364 |
useFixedg, |
|
| 1365 |
useFixedc, |
|
| 1366 |
chainlength, |
|
| 1367 |
doSampling, |
|
| 1368 |
verbose, |
|
| 1369 |
modelPrior, |
|
| 1370 |
nModels, |
|
| 1371 |
nCache, |
|
| 1372 |
largeVariance, |
|
| 1373 |
useBfgs, |
|
| 1374 |
debug, |
|
| 1375 | 150x |
higherOrderCorrection); |
| 1376 | ||
| 1377 |
// model configuration: |
|
| 1378 |
const GlmModelConfig config(rcpp_family, nullModelLogMargLik, nullModelDeviance, fixedg, rcpp_gPrior, |
|
| 1379 | 150x |
data.response, bookkeep.debug, bookkeep.useFixedc, empiricalMean, |
| 1380 | 150x |
empiricalgPrior); |
| 1381 | ||
| 1382 |
// use only one thread if we do not want to use openMP. |
|
| 1383 |
#ifdef _OPENMP |
|
| 1384 | 150x |
if(! useOpenMP) |
| 1385 |
{
|
|
| 1386 | 40x |
omp_set_num_threads(1); |
| 1387 |
} else {
|
|
| 1388 |
// else use all available cpu's. |
|
| 1389 | 110x |
omp_set_num_threads(omp_get_num_procs()); |
| 1390 |
} |
|
| 1391 |
#endif |
|
| 1392 | ||
| 1393 |
// ---------------------------------------------------------------------------------- |
|
| 1394 |
// now either compute only one Model, do model sampling or do an exhaustive search |
|
| 1395 |
// ---------------------------------------------------------------------------------- |
|
| 1396 | ||
| 1397 | 150x |
if(onlyComputeModelsInList) |
| 1398 |
{
|
|
| 1399 | ! |
return glmModelsInList(data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite, as<List>(rcpp_searchConfig["modelConfigs"])); |
| 1400 |
} |
|
| 1401 | 150x |
else if(doSampling) |
| 1402 |
{
|
|
| 1403 | 140x |
return glmSampling(data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
| 1404 |
} |
|
| 1405 |
else |
|
| 1406 |
{
|
|
| 1407 | 10x |
return glmExhaustive(data, fpInfo, ucInfo, fixInfo, bookkeep, config, gaussHermite); |
| 1408 |
} |
|
| 1409 |
} |
|
| 1410 | ||
| 1411 |
// ***************************************************************************************************// |
|
| 1412 | ||
| 1413 |
// End of file. |
| 1 |
/* |
|
| 2 |
* optimize.h |
|
| 3 |
* |
|
| 4 |
* Created on: 08.12.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef OPTIMIZE_H_ |
|
| 9 |
#define OPTIMIZE_H_ |
|
| 10 | ||
| 11 |
#include "types.h" |
|
| 12 | ||
| 13 |
// modified from R's scalar function "optimize" routine "Brent_fmin" |
|
| 14 |
// in /src/appl/fmin.c, |
|
| 15 |
// in order to take a functor instead of an ordinary function, and does not |
|
| 16 |
// take an info argument |
|
| 17 | ||
| 18 |
// original documentation: |
|
| 19 | ||
| 20 |
// returns an approximation x to the point where f attains a minimum on |
|
| 21 |
// the interval (ax,bx) is determined. |
|
| 22 |
// |
|
| 23 |
// INPUT.. |
|
| 24 |
// |
|
| 25 |
// ax left endpoint of initial interval |
|
| 26 |
// bx right endpoint of initial interval |
|
| 27 |
// f function which evaluates f(x, info) for any x |
|
| 28 |
// in the interval (ax,bx) |
|
| 29 |
// tol desired length of the interval of uncertainty of the final |
|
| 30 |
// result ( >= 0.) |
|
| 31 |
// |
|
| 32 |
// OUTPUT.. |
|
| 33 |
// |
|
| 34 |
// fmin abcissa approximating the point where f attains a minimum |
|
| 35 |
// |
|
| 36 |
// The method used is a combination of golden section search and |
|
| 37 |
// successive parabolic interpolation. convergence is never much slower |
|
| 38 |
// than that for a Fibonacci search. If f has a continuous second |
|
| 39 |
// derivative which is positive at the minimum (which is not at ax or |
|
| 40 |
// bx), then convergence is superlinear, and usually of the order of |
|
| 41 |
// about 1.324.... |
|
| 42 |
// The function f is never evaluated at two points closer together |
|
| 43 |
// than eps*abs(fmin)+(tol/3), where eps is approximately the square |
|
| 44 |
// root of the relative machine precision. if f is a unimodal |
|
| 45 |
// function and the computed values of f are always unimodal when |
|
| 46 |
// separated by at least eps*abs(x)+(tol/3), then fmin approximates |
|
| 47 |
// the abcissa of the global minimum of f on the interval ax,bx with |
|
| 48 |
// an error less than 3*eps*abs(fmin)+tol. if f is not unimodal, |
|
| 49 |
// then fmin may approximate a local, but perhaps non-global, minimum to |
|
| 50 |
// the same accuracy. |
|
| 51 |
// This function subprogram is a slightly modified version of the |
|
| 52 |
// Algol 60 procedure localmin given in Richard Brent, Algorithms for |
|
| 53 |
// Minimization without Derivatives, Prentice-Hall, Inc. (1973). |
|
| 54 | ||
| 55 | ||
| 56 |
template <class Fun> |
|
| 57 |
class Brent {
|
|
| 58 |
public: |
|
| 59 |
// setup the object |
|
| 60 |
// - function is the function to be maximized in the interval (lowerBound, upperBound) |
|
| 61 |
// - lowerBound: lower bound on the function argument |
|
| 62 |
// - upperBound: upper bound on the function argument |
|
| 63 |
// - precision: desired length of the interval of uncertainty of the final result ( > 0.0, |
|
| 64 |
// default: fourth root of machine precision) |
|
| 65 | 6865x |
Brent(Fun& function, |
| 66 |
double lowerBound, |
|
| 67 |
double upperBound, |
|
| 68 |
double precision=sqrt(EPS)) : // EPS is the square root of the machine precision |
|
| 69 | 6865x |
function(function), lowerBound(lowerBound), |
| 70 | 6865x |
upperBound(upperBound), precision(precision) |
| 71 |
{
|
|
| 72 |
// check arguments |
|
| 73 |
if (R_finite(lowerBound) == FALSE || R_finite(upperBound) == FALSE) |
|
| 74 | ! |
Rf_error("Brent: bounds must be finite");
|
| 75 | ||
| 76 | 6865x |
if (lowerBound >= upperBound) |
| 77 | ! |
Rf_error("Brent: lowerBound not smaller than upperBound");
|
| 78 | ||
| 79 | 6865x |
if (precision <= 0.0) |
| 80 | ! |
Rf_error("Brent: precision not positive");
|
| 81 |
} |
|
| 82 | ||
| 83 |
// minimize the function |
|
| 84 |
double |
|
| 85 |
minimize (); |
|
| 86 | ||
| 87 |
private: |
|
| 88 |
// the function we want to maximize |
|
| 89 |
Fun& function; |
|
| 90 | ||
| 91 |
// constraints on the function argument |
|
| 92 |
const double lowerBound; |
|
| 93 |
const double upperBound; |
|
| 94 | ||
| 95 |
// settings |
|
| 96 |
const double precision; |
|
| 97 |
}; |
|
| 98 | ||
| 99 |
// the actual minimization function |
|
| 100 |
template<class Fun> |
|
| 101 |
double |
|
| 102 | 6865x |
Brent<Fun>::minimize() |
| 103 |
{
|
|
| 104 |
/* c is the squared inverse of the golden ratio */ |
|
| 105 | 6865x |
const double c = (3.0 - sqrt(5.0)) * 0.5; |
| 106 | ||
| 107 |
/* Local variables */ |
|
| 108 |
double a, b, d, e, p, q, r, u, v, w, x; |
|
| 109 |
double t2, fu, fv, fw, fx, xm, eps, tol1, tol3; |
|
| 110 | ||
| 111 |
/* eps is approximately the square root of the relative machine precision. */ |
|
| 112 | 6865x |
eps = DBL_EPSILON; |
| 113 | 6865x |
tol1 = eps + 1.0; /* the smallest 1.000... > 1 */ |
| 114 | 6865x |
eps = sqrt(eps); |
| 115 | ||
| 116 | 6865x |
a = lowerBound; |
| 117 | 6865x |
b = upperBound; |
| 118 | ||
| 119 | 6865x |
v = a + c * (b - a); |
| 120 | 6865x |
w = v; |
| 121 | 6865x |
x = v; |
| 122 | ||
| 123 | 6865x |
d = 0.0; /* -Wall */ |
| 124 | 6865x |
e = 0.0; |
| 125 | 6865x |
fx = function(x); |
| 126 | 6865x |
fv = fx; |
| 127 | 6865x |
fw = fx; |
| 128 | 6865x |
tol3 = precision / 3.0; |
| 129 | ||
| 130 |
/* main loop starts here ----------------------------------- */ |
|
| 131 | ||
| 132 |
for (;;) |
|
| 133 |
{
|
|
| 134 | 99405x |
xm = (a + b) * 0.5; |
| 135 | 99405x |
tol1 = eps * fabs(x) + tol3; |
| 136 | 99405x |
t2 = tol1 * 2.0; |
| 137 | ||
| 138 |
/* check stopping criterion */ |
|
| 139 | ||
| 140 | 99405x |
if (fabs(x - xm) <= t2 - (b - a) * 0.5) |
| 141 | 6865x |
break; |
| 142 | 92540x |
p = 0.0; |
| 143 | 92540x |
q = 0.0; |
| 144 | 92540x |
r = 0.0; |
| 145 | 92540x |
if (fabs(e) > tol1) |
| 146 |
{ /* fit parabola */
|
|
| 147 | ||
| 148 | 85655x |
r = (x - w) * (fx - fv); |
| 149 | 85655x |
q = (x - v) * (fx - fw); |
| 150 | 85655x |
p = (x - v) * q - (x - w) * r; |
| 151 | 85655x |
q = (q - r) * 2.0; |
| 152 | 85655x |
if (q > 0.0) |
| 153 | 32785x |
p = - p; |
| 154 |
else |
|
| 155 | 52870x |
q = - q; |
| 156 | 85655x |
r = e; |
| 157 | 85655x |
e = d; |
| 158 |
} |
|
| 159 | ||
| 160 | 92540x |
if (fabs(p) >= fabs(q * 0.5 * r) || p <= q * (a - x) || p >= q * (b - x)) |
| 161 |
{ /* a golden-section step */
|
|
| 162 | ||
| 163 | 23175x |
if (x < xm) |
| 164 | 10640x |
e = b - x; |
| 165 |
else |
|
| 166 | 12535x |
e = a - x; |
| 167 | 23175x |
d = c * e; |
| 168 |
} |
|
| 169 |
else |
|
| 170 |
{ /* a parabolic-interpolation step */
|
|
| 171 | ||
| 172 | 69365x |
d = p / q; |
| 173 | 69365x |
u = x + d; |
| 174 | ||
| 175 |
/* f must not be evaluated too close to ax or bx */ |
|
| 176 | ||
| 177 | 69365x |
if (u - a < t2 || b - u < t2) |
| 178 |
{
|
|
| 179 | 6855x |
d = tol1; |
| 180 | 6855x |
if (x >= xm) |
| 181 | 4015x |
d = - d; |
| 182 |
} |
|
| 183 |
} |
|
| 184 | ||
| 185 |
/* f must not be evaluated too close to x */ |
|
| 186 | ||
| 187 | 92540x |
if (fabs(d) >= tol1) |
| 188 | 86620x |
u = x + d; |
| 189 | 5920x |
else if (d > 0.) |
| 190 | 3720x |
u = x + tol1; |
| 191 |
else |
|
| 192 | 2200x |
u = x - tol1; |
| 193 | ||
| 194 | 92540x |
fu = function(u); |
| 195 | ||
| 196 |
/* update a, b, v, w, and x */ |
|
| 197 | ||
| 198 | 92540x |
if (fu <= fx) |
| 199 |
{
|
|
| 200 | 48045x |
if (u < x) |
| 201 | 28440x |
b = x; |
| 202 |
else |
|
| 203 | 19605x |
a = x; |
| 204 | 48045x |
v = w; |
| 205 | 48045x |
w = x; |
| 206 | 48045x |
x = u; |
| 207 | 48045x |
fv = fw; |
| 208 | 48045x |
fw = fx; |
| 209 | 48045x |
fx = fu; |
| 210 |
} |
|
| 211 |
else |
|
| 212 |
{
|
|
| 213 | 44495x |
if (u < x) |
| 214 | 21370x |
a = u; |
| 215 |
else |
|
| 216 | 23125x |
b = u; |
| 217 | 44495x |
if (fu <= fw || w == x) |
| 218 |
{
|
|
| 219 | 33375x |
v = w; |
| 220 | 33375x |
fv = fw; |
| 221 | 33375x |
w = u; |
| 222 | 33375x |
fw = fu; |
| 223 |
} |
|
| 224 | 11120x |
else if (fu <= fv || v == x || v == w) |
| 225 |
{
|
|
| 226 | 8590x |
v = u; |
| 227 | 8590x |
fv = fu; |
| 228 |
} |
|
| 229 |
} |
|
| 230 |
} |
|
| 231 |
/* end of main loop */ |
|
| 232 | ||
| 233 |
// return the found minimum |
|
| 234 | 6865x |
return x; |
| 235 |
} |
|
| 236 | ||
| 237 | ||
| 238 | ||
| 239 |
#endif /* OPTIMIZE_H_ */ |
| 1 |
/* |
|
| 2 |
* gpriors.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 22.11.2012 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 | ||
| 9 |
#include "gpriors.h" |
|
| 10 | ||
| 11 | ||
| 12 |
// compute M(a, b) |
|
| 13 |
static double |
|
| 14 | ! |
incInvGammaLogNormConst(double a, double b) |
| 15 |
{
|
|
| 16 |
double logNormConst; |
|
| 17 | ! |
if(b > 0) |
| 18 |
{
|
|
| 19 | ! |
logNormConst = a * log(b) - Rf_pgamma(b, a, 1.0, 1, 1) - Rf_lgammafn(a); |
| 20 |
} else {
|
|
| 21 | ! |
logNormConst = log(a); |
| 22 |
} |
|
| 23 | ! |
return logNormConst; |
| 24 |
} |
|
| 25 | ||
| 26 |
// Log prior density |
|
| 27 |
double |
|
| 28 | ! |
IncInvGammaGPrior::logDens(double g) const |
| 29 |
{
|
|
| 30 | ! |
return (- (a + 1.0) * log1p(g) - b / (g + 1.0) + incInvGammaLogNormConst(a, b)); |
| 31 |
} |
|
| 32 | ||
| 33 | ||
| 34 |
// for this class we have a closed form for the log marginal likelihood |
|
| 35 |
// resulting from the TBF approach |
|
| 36 |
double |
|
| 37 | ! |
IncInvGammaGPrior::getTBFLogMargLik(double residualDeviance, int df) const |
| 38 |
{
|
|
| 39 | ! |
return (incInvGammaLogNormConst(a, b) - |
| 40 | ! |
incInvGammaLogNormConst(a + df / 2.0, b + residualDeviance / 2.0) + |
| 41 | ! |
residualDeviance / 2.0); |
| 42 |
} |
| 1 |
#include <R.h> |
|
| 2 |
#include <Rinternals.h> |
|
| 3 |
#include <stdlib.h> // for NULL |
|
| 4 |
#include <R_ext/Rdynload.h> |
|
| 5 | ||
| 6 |
/* FIXME: |
|
| 7 |
Check these declarations against the C/Fortran source code. |
|
| 8 |
*/ |
|
| 9 | ||
| 10 |
/* .Call calls */ |
|
| 11 |
extern SEXP _glmBfp_cpp_bfgs(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); |
|
| 12 |
extern SEXP _glmBfp_cpp_coxfit(SEXP, SEXP, SEXP, SEXP, SEXP); |
|
| 13 |
extern SEXP _glmBfp_cpp_evalZdensity(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); |
|
| 14 |
extern SEXP _glmBfp_cpp_glmBayesMfp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); |
|
| 15 |
extern SEXP _glmBfp_cpp_optimize(SEXP, SEXP, SEXP, SEXP); |
|
| 16 |
extern SEXP _glmBfp_cpp_sampleGlm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); |
|
| 17 |
extern SEXP _glmBfp_predBMAcpp(SEXP, SEXP, SEXP); |
|
| 18 | ||
| 19 |
static const R_CallMethodDef CallEntries[] = {
|
|
| 20 |
{"_glmBfp_cpp_bfgs", (DL_FUNC) &_glmBfp_cpp_bfgs, 6},
|
|
| 21 |
{"_glmBfp_cpp_coxfit", (DL_FUNC) &_glmBfp_cpp_coxfit, 5},
|
|
| 22 |
{"_glmBfp_cpp_evalZdensity", (DL_FUNC) &_glmBfp_cpp_evalZdensity, 7},
|
|
| 23 |
{"_glmBfp_cpp_glmBayesMfp", (DL_FUNC) &_glmBfp_cpp_glmBayesMfp, 7},
|
|
| 24 |
{"_glmBfp_cpp_optimize", (DL_FUNC) &_glmBfp_cpp_optimize, 4},
|
|
| 25 |
{"_glmBfp_cpp_sampleGlm", (DL_FUNC) &_glmBfp_cpp_sampleGlm, 9},
|
|
| 26 |
{"_glmBfp_predBMAcpp", (DL_FUNC) &_glmBfp_predBMAcpp, 3},
|
|
| 27 |
{NULL, NULL, 0}
|
|
| 28 |
}; |
|
| 29 | ||
| 30 | 32x |
void R_init_glmBfp(DllInfo *dll) |
| 31 |
{
|
|
| 32 | 32x |
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); |
| 33 | 32x |
R_useDynamicSymbols(dll, FALSE); |
| 34 |
} |
| 1 |
// * 13/07/2015 Replace assert() with Rccp:Stop() |
|
| 2 | ||
| 3 | ||
| 4 |
#include "iwls.h" |
|
| 5 |
#include "types.h" |
|
| 6 |
#include "rcppExport.h" |
|
| 7 |
//#include <cassert> |
|
| 8 |
#include "design.h" |
|
| 9 |
#include <stdexcept> |
|
| 10 |
#include <sstream> |
|
| 11 |
#include "linalgInterface.h" |
|
| 12 | ||
| 13 |
// criterion for comparison of two Column vectors of the same size |
|
| 14 |
// max_j (abs(a_j - b_j) / abs(b_j) + 0.01) |
|
| 15 |
// this is similar to the criterion used by R's glm routine on the deviance scale. |
|
| 16 |
// However, we want to avoid the posterior scale because it would slow down the algorithm... |
|
| 17 |
// (computing this criterion is easier than computing the likelihood * prior) |
|
| 18 |
double |
|
| 19 | 194418x |
criterion(const AVector& a, const AVector& b) |
| 20 |
{
|
|
| 21 |
// check lengths |
|
| 22 |
//assert(a.n_elem == b.n_elem); |
|
| 23 |
if(a.n_elem != b.n_elem) Rcpp::stop("iwls.cpp:criterion: a.n_elem != b.n_elem");
|
|
| 24 |
|
|
| 25 |
// this will be the returned value |
|
| 26 | 194418x |
double ret = 0.0; |
| 27 | ||
| 28 |
// now iterate over the elements |
|
| 29 |
#pragma omp parallel for |
|
| 30 |
for (PosInt j = 0; j < a.n_elem; ++j) |
|
| 31 |
{
|
|
| 32 |
double tmp = fabs(a(j) - b(j)) / (fabs(b(j)) + 0.01); |
|
| 33 | ||
| 34 |
// note that the "critical" directive is vital here!! |
|
| 35 |
// Otherwise two executions of the same code can lead to different answers. |
|
| 36 |
#pragma omp critical |
|
| 37 |
ret = (tmp > ret) ? tmp : ret; /* fmax(ret, tmp); */ |
|
| 38 |
} |
|
| 39 | ||
| 40 |
// return the criterion value. |
|
| 41 | 194418x |
return ret; |
| 42 |
} |
|
| 43 | ||
| 44 | ||
| 45 |
// constructor: constructs the Iwls object for given model and data. |
|
| 46 | 910x |
Iwls::Iwls(const ModelPar &mod, |
| 47 |
const DataValues& data, |
|
| 48 |
const FpInfo& fpInfo, |
|
| 49 |
const UcInfo& ucInfo, |
|
| 50 |
const FixInfo& fixInfo, |
|
| 51 |
const GlmModelConfig& config, |
|
| 52 |
const AVector& linPredStart, |
|
| 53 |
bool conditional, |
|
| 54 |
double epsilon, |
|
| 55 |
bool debug, |
|
| 56 | 910x |
bool tbf) : |
| 57 | 910x |
design(getDesignMatrix(mod, data, fpInfo, ucInfo, fixInfo)), |
| 58 | 910x |
nCoefs(design.n_cols), |
| 59 | 910x |
isNullModel(nCoefs == 1), |
| 60 | 910x |
useFixedZ(conditional), |
| 61 | 910x |
nObs(design.n_rows), |
| 62 |
// not possible because this could be the null model: designWithoutIntercept(nObs, nCoefs - 1), |
|
| 63 | 910x |
response(data.response), |
| 64 | 910x |
config(config), |
| 65 | 1820x |
invSqrtDispersions(1.0 / arma::sqrt(config.dispersions)), |
| 66 | 910x |
unscaledPriorPrec(nCoefs, nCoefs), |
| 67 | 910x |
results(linPredStart, nCoefs), |
| 68 | 910x |
epsilon(epsilon), |
| 69 |
// verbose(debug), |
|
| 70 | 910x |
tbf(tbf) |
| 71 |
{
|
|
| 72 |
// only do additional computations if not the TBF methodology is used |
|
| 73 | 910x |
if(! tbf) |
| 74 |
{
|
|
| 75 | 910x |
if(! isNullModel) |
| 76 |
{
|
|
| 77 | 910x |
AMatrix scaledDesignWithoutInterceptCrossprod; |
| 78 |
|
|
| 79 | 910x |
if(!config.empiricalgPrior){
|
| 80 |
// Scaled design matrix without the intercept. |
|
| 81 |
// This will be diag(dispersions)^(-1/2) * design[, -1] |
|
| 82 |
// (attention with 0-based indexing of Armadillo objects!) |
|
| 83 | 3640x |
AMatrix scaledDesignWithoutIntercept = arma::diagmat(invSqrtDispersions) * design.cols(1, nCoefs - 1); |
| 84 | ||
| 85 |
// then the log of the determinant of B'(dispersions)^(-1)B, which is part of the submatrix of R^-1 |
|
| 86 |
// (we know that B'(dispersions)^(-1)B is positive definite, so we do not need to check the sign of the determinant) |
|
| 87 | 910x |
scaledDesignWithoutInterceptCrossprod = arma::trans(scaledDesignWithoutIntercept) * scaledDesignWithoutIntercept; |
| 88 |
} |
|
| 89 |
|
|
| 90 | 910x |
AMatrix infoMatrix; |
| 91 | 910x |
if(config.empiricalgPrior) |
| 92 |
{
|
|
| 93 | ! |
IwlsResults resultsFisher = results; |
| 94 |
|
|
| 95 | ! |
unscaledPriorPrec.zeros(); |
| 96 | ! |
infoMatrix = getInformation(100, |
| 97 | ! |
nObs, |
| 98 | ! |
invSqrtDispersions, |
| 99 |
resultsFisher, |
|
| 100 |
config, |
|
| 101 |
response, |
|
| 102 | ! |
design, |
| 103 |
epsilon, |
|
| 104 | ! |
unscaledPriorPrec |
| 105 | ! |
); //an alternative to using the X'X covariance matrix |
| 106 |
|
|
| 107 | ! |
scaledDesignWithoutInterceptCrossprod = infoMatrix.submat(1, 1, nCoefs - 1, nCoefs - 1); |
| 108 |
|
|
| 109 |
} |
|
| 110 |
|
|
| 111 |
// input that (the main ingredient) into the unscaled R^-1 |
|
| 112 |
// but first be sure there are zeroes anywhere else: |
|
| 113 | 910x |
unscaledPriorPrec.zeros(); |
| 114 |
|
|
| 115 | 2730x |
unscaledPriorPrec.submat(1, 1, nCoefs - 1, nCoefs - 1) = scaledDesignWithoutInterceptCrossprod / config.cfactor; |
| 116 | ||
| 117 |
// Rcpp::Rcout << "Prior Prec:\n"<< unscaledPriorPrec << std::endl; |
|
| 118 |
|
|
| 119 |
// if(config.empiricalgPrior) unscaledPriorPrec.submat(1, 1, nCoefs - 1, nCoefs - 1) = infoMatrix.submat(1, 1, nCoefs - 1, nCoefs - 1); |
|
| 120 |
|
|
| 121 | ||
| 122 |
// now directly use the cholesky routine to avoid copying too much unnecessarily |
|
| 123 | 910x |
int info = potrf(false, |
| 124 |
scaledDesignWithoutInterceptCrossprod); |
|
| 125 | ||
| 126 |
// check that all went well |
|
| 127 | 910x |
if(info != 0) |
| 128 |
{
|
|
| 129 | ! |
std::ostringstream stream; |
| 130 | ! |
stream << "dpotrf(scaledDesignWithoutInterceptCrossprod) got error code " << info << "in Iwls constructor"; |
| 131 | ! |
throw std::domain_error(stream.str().c_str()); |
| 132 |
} |
|
| 133 |
// now scaledDesignWithoutInterceptCrossprod contains the cholesky factor! |
|
| 134 | ||
| 135 |
// also do not copy the cholesky factor saved now in nonInterceptDesignCrossprod into an extra matrix, |
|
| 136 |
// but use the Triangular view |
|
| 137 | 910x |
logScaledDesignWithoutInterceptCrossprodDeterminant = |
| 138 | 1820x |
2.0 * arma::as_scalar(arma::sum(arma::log(arma::diagvec(scaledDesignWithoutInterceptCrossprod)))); |
| 139 |
} else {
|
|
| 140 |
// this is the null model |
|
| 141 | ||
| 142 |
// be sure that this is correct: |
|
| 143 | ! |
unscaledPriorPrec(0, 0) = 0.0; |
| 144 |
} |
|
| 145 |
} |
|
| 146 |
} |
|
| 147 | ||
| 148 |
// 03/07/2013: add offsets |
|
| 149 | ||
| 150 |
// do the Iwls algorithm for a given covariance factor g and start linear predictor linPred, |
|
| 151 |
// until convergence or until the maximum number of iterations is reached. |
|
| 152 |
// so also only one iwls step can be performed with this function. |
|
| 153 |
// Note that the linear predictor is the sum of X^T * beta and the vector of offsets. |
|
| 154 |
// returns the number of iterations. |
|
| 155 |
PosInt |
|
| 156 | 106057x |
Iwls::startWithLastLinPred(PosInt maxIter, |
| 157 |
double g) |
|
| 158 |
{
|
|
| 159 |
// initialize iteration counter and stopping criterion |
|
| 160 | 106057x |
PosInt iter = 0; |
| 161 | 106057x |
bool converged = false; |
| 162 | ||
| 163 |
// do IWLS for at most 30 iterations and unfulfilled stopping criterion |
|
| 164 | 406532x |
while ((iter++ < maxIter) && (! converged)) |
| 165 |
{
|
|
| 166 |
// compute the pseudo-observations and corresponding sqrt(weights) from the linear predictor |
|
| 167 | 300475x |
AVector pseudoObs(nObs); |
| 168 | 300475x |
AVector sqrtWeights(invSqrtDispersions); |
| 169 | ||
| 170 |
#pragma omp parallel for |
|
| 171 |
for(PosInt i = 0; i < nObs; ++i) |
|
| 172 |
{
|
|
| 173 |
double mu = config.link->linkinv(results.linPred(i)); |
|
| 174 |
double dmudEta = config.link->mu_eta(results.linPred(i)); |
|
| 175 | ||
| 176 |
pseudoObs(i) = results.linPred(i) - config.offsets(i) + (response(i) - mu) / dmudEta; |
|
| 177 |
sqrtWeights(i) *= dmudEta / sqrt(config.distribution->variance(mu)); |
|
| 178 |
} |
|
| 179 | ||
| 180 |
// calculate X'sqrt(W), which is needed twice |
|
| 181 | 901425x |
AMatrix XtsqrtW = arma::trans(design) * arma::diagmat(sqrtWeights); |
| 182 | ||
| 183 |
// calculate the precision matrix Q by doing a rank update: |
|
| 184 |
// if full Bayes is used, then: |
|
| 185 |
// Q = tcrossprod(X'sqrt(W)) + 1/g * unscaledPriorPrec |
|
| 186 |
// if TBF are used, then: |
|
| 187 |
// Q = tcrossprod(X'sqrt(W)) |
|
| 188 |
double scaleFactor = tbf ? 0.0 : 1.0 / g; |
|
| 189 | 300475x |
results.qFactor = unscaledPriorPrec; |
| 190 | 300475x |
syrk(false, |
| 191 |
false, |
|
| 192 |
XtsqrtW, |
|
| 193 |
scaleFactor, |
|
| 194 | 300475x |
results.qFactor); |
| 195 | ||
| 196 |
// decompose into Cholesky factor, Q = LL': |
|
| 197 | 600950x |
int info = potrf(false, |
| 198 | 300475x |
results.qFactor); |
| 199 | ||
| 200 |
// check that no error occured |
|
| 201 | 300475x |
if(info != 0) |
| 202 |
{
|
|
| 203 | ! |
std::ostringstream stream; |
| 204 | ! |
stream << "Cholesky factorization Q = LL' got error code " << info << |
| 205 | ! |
" in IWLS iteration " << iter; |
| 206 | ! |
if(! tbf) |
| 207 |
{
|
|
| 208 | ! |
stream << " for z=" << ::log(g); |
| 209 |
} |
|
| 210 | ! |
throw std::domain_error(stream.str().c_str()); |
| 211 |
} |
|
| 212 | ||
| 213 |
// save the old coefficients vector |
|
| 214 | 300475x |
AVector coefs_old = results.coefs; |
| 215 | ||
| 216 |
// the rhs of the equation Q * m = rhs or R'R * m = rhs |
|
| 217 | 300475x |
pseudoObs = arma::diagmat(sqrtWeights) * pseudoObs; |
| 218 | 300475x |
results.coefs = XtsqrtW * pseudoObs; |
| 219 |
// note that we have some steps to go until the computation |
|
| 220 |
// of results.coefs is finished! |
|
| 221 | ||
| 222 |
// forward-backward solve LL' * v = rhs |
|
| 223 | 600950x |
info = potrs(false, |
| 224 | 300475x |
results.qFactor, |
| 225 |
results.coefs); |
|
| 226 | ||
| 227 |
// check that no error occured |
|
| 228 | 300475x |
if(info != 0) |
| 229 |
{
|
|
| 230 | ! |
std::ostringstream stream; |
| 231 | ! |
stream << "Forward-backward solve got error code " << info << |
| 232 | ! |
" in IWLS iteration " << iter; |
| 233 | ! |
if(! tbf) |
| 234 |
{
|
|
| 235 | ! |
stream << " for z=" << ::log(g); |
| 236 |
} |
|
| 237 | ! |
throw std::domain_error(stream.str().c_str()); |
| 238 |
} |
|
| 239 | ||
| 240 |
// the new linear predictor is |
|
| 241 | 901425x |
results.linPred = design * results.coefs + config.offsets; |
| 242 | ||
| 243 |
// compare on the coefficients scale, but not in the first iteration where |
|
| 244 |
// it is not clear from where coefs_old came. Be safe and always |
|
| 245 |
// decide for non-convergence in this case. |
|
| 246 | 300475x |
converged = (iter > 1) ? (criterion(coefs_old, results.coefs) < epsilon) : false; |
| 247 |
} |
|
| 248 | ||
| 249 |
// do not (!) |
|
| 250 |
// warn if IWLS did not converge within the maximum number of iterations |
|
| 251 |
// because the maximum number of iterations can be set by user of this function. |
|
| 252 | ||
| 253 |
// compute log precision determinant |
|
| 254 | 318171x |
results.logPrecisionDeterminant = 2.0 * arma::as_scalar(arma::sum(arma::log(arma::diagvec(results.qFactor)))); |
| 255 | ||
| 256 |
// last but not least return the number of iterations |
|
| 257 | 106057x |
return iter; |
| 258 |
} |
|
| 259 | ||
| 260 |
// do the Iwls algorithm for a given covariance factor g and new start linear predictor |
|
| 261 |
// linPredStart. |
|
| 262 |
PosInt |
|
| 263 | 15407x |
Iwls::startWithNewLinPred(PosInt maxIter, |
| 264 |
double g, |
|
| 265 |
const AVector& linPredStart) |
|
| 266 |
{
|
|
| 267 |
// copy start value into linear predictor of the iwls object |
|
| 268 | 15407x |
results.linPred = linPredStart; |
| 269 | ||
| 270 |
// then run the iwls algorithm |
|
| 271 | 15407x |
return startWithLastLinPred(maxIter, g); |
| 272 |
} |
|
| 273 | ||
| 274 |
// 03/07/2013: add offsets |
|
| 275 | ||
| 276 |
// do the Iwls algorithm for a given covariance factor g and new start coefficients vector |
|
| 277 |
// coefsStart. |
|
| 278 |
PosInt |
|
| 279 | 14700x |
Iwls::startWithNewCoefs(PosInt maxIter, |
| 280 |
double g, |
|
| 281 |
const AVector& coefsStart) |
|
| 282 |
{
|
|
| 283 |
// start with new linpred deriving from the coefs |
|
| 284 | 44100x |
return startWithNewLinPred(maxIter, g, design * coefsStart + config.offsets); |
| 285 |
} |
|
| 286 | ||
| 287 |
// 03/07/2013: add offsets |
|
| 288 | ||
| 289 |
// compute the log of the (unnormalized) |
|
| 290 |
// posterior density for a given parameter consisting of the coefficients vector and z. |
|
| 291 |
// |
|
| 292 |
// Note that it is important to incorporate all model-depending constants here, |
|
| 293 |
// because this function is also used in the Chib-Jeliazkov marginal likelihood estimation, |
|
| 294 |
// comparing different models!! |
|
| 295 |
// |
|
| 296 |
// useFixedZ: is the log-covariance factor z fixed? Then the conditional posterior |
|
| 297 |
// density of the coefficients vector is returned (so the prior of z is not included). |
|
| 298 |
double |
|
| 299 | 98007x |
Iwls::computeLogUnPosteriorDens(const Parameter& sample) const |
| 300 |
{
|
|
| 301 |
// compute the sample of the linear predictor: |
|
| 302 | 294021x |
AVector linPredSample = design * sample.coefs + config.offsets; |
| 303 | ||
| 304 |
// compute the resulting mean vector from the linear predictor via the response function |
|
| 305 | 98007x |
AVector meansSample(linPredSample.n_elem); |
| 306 | ||
| 307 |
#pragma omp parallel for |
|
| 308 |
for(PosInt i = 0; i < meansSample.n_elem; ++i) |
|
| 309 |
{
|
|
| 310 |
meansSample(i) = config.link->linkinv(linPredSample(i)); |
|
| 311 |
} |
|
| 312 | ||
| 313 |
// start with the log likelihood of this coefficients, it is always included |
|
| 314 |
// this part is included in both cases because it does not depend on |
|
| 315 |
// the prior on the (non-intercept) effects: |
|
| 316 | ||
| 317 | 196014x |
double ret = config.distribution->loglik(meansSample.memptr()); |
| 318 | ||
| 319 |
// now it depends again on null model or not. |
|
| 320 | ||
| 321 | 98007x |
if(! isNullModel) |
| 322 |
{
|
|
| 323 |
// map z sample on the original g scale |
|
| 324 | 98007x |
double g = exp(sample.z); |
| 325 | ||
| 326 |
// calculate ||(dispersions)^(-1/2) * B * beta||^2 |
|
| 327 | 490035x |
AVector scaledBcoefsSample = arma::diagmat(invSqrtDispersions) * (linPredSample - sample.coefs(0)); |
| 328 | ||
| 329 |
// this avoids this multiplication of general matrices: |
|
| 330 |
// "DEVector scaledBcoefsSample = scaledDesignWithoutIntercept * sample.coefs(_(2, nCoefs));" |
|
| 331 | 98007x |
double scaledBcoefsSampleNormSquared = arma::dot(scaledBcoefsSample, scaledBcoefsSample); |
| 332 | ||
| 333 |
// now add the non-null model specific part, which comes from the prior on |
|
| 334 |
// the coefficients |
|
| 335 | 98007x |
ret += 0.5 * (logScaledDesignWithoutInterceptCrossprodDeterminant - |
| 336 | 98007x |
scaledBcoefsSampleNormSquared / (g * config.cfactor) - |
| 337 | 98007x |
(nCoefs - 1.0) * (2.0 * M_LN_SQRT_2PI + sample.z + log(config.cfactor))); |
| 338 | ||
| 339 | 98007x |
if(! useFixedZ) |
| 340 |
{
|
|
| 341 |
// and the log prior of this g |
|
| 342 | 98007x |
double logGPrior = config.gPrior->logDens(g); |
| 343 | ||
| 344 |
// add the log prior of z |
|
| 345 | 98007x |
ret += logGPrior + sample.z; |
| 346 | ||
| 347 |
// note that the sample.z has its origin in the density transformation from g to z. |
|
| 348 |
// if the prior on g was discrete and not continuous, this part would not be included in general. |
|
| 349 |
} |
|
| 350 |
} |
|
| 351 | ||
| 352 |
// return the correct value |
|
| 353 | 98007x |
return ret; |
| 354 |
} |
|
| 355 | ||
| 356 |
// compute the deviance of the current model, which in R is done by the glm.fit function |
|
| 357 |
// This is required to compute the TBF. |
|
| 358 |
// Note that this changes the Iwls object, because it iterates until convergence |
|
| 359 |
// or until the maximum number of iterations has been reached. |
|
| 360 |
double |
|
| 361 | ! |
Iwls::computeDeviance(PosInt maxIter) |
| 362 |
{
|
|
| 363 | ! |
PosInt iter = startWithLastLinPred(maxIter, R_PosInf); |
| 364 | ||
| 365 |
// if IWLS did not converge within the maximum number of iterations, |
|
| 366 |
// throw error which is then caught by getGlmVarLogMargLik later on |
|
| 367 | ! |
if(iter > maxIter) |
| 368 |
{
|
|
| 369 | ! |
std::ostringstream stream; |
| 370 | ! |
stream << "model could not be fitted within " << maxIter << |
| 371 | ! |
" IWLS iterations. This could be a result of separability in logistic regression, e.g."; |
| 372 | ! |
throw std::domain_error(stream.str().c_str()); |
| 373 |
} |
|
| 374 | ||
| 375 |
// compute the resulting mean vector from the linear predictor via the response function |
|
| 376 | ! |
AVector means(nObs); |
| 377 | ||
| 378 |
#pragma omp parallel for |
|
| 379 |
for(PosInt i = 0; i < nObs; ++i) |
|
| 380 |
{
|
|
| 381 |
means(i) = config.link->linkinv(results.linPred(i)); |
|
| 382 |
} |
|
| 383 | ||
| 384 |
// compute the log-likelihood |
|
| 385 | ! |
double logLik = config.distribution->loglik(means.memptr()); |
| 386 | ||
| 387 |
// return the deviance, which is just the scaled log-likelihood |
|
| 388 | ! |
double ret = - 2.0 * logLik; |
| 389 | ! |
return ret; |
| 390 |
} |
|
| 391 | ||
| 392 | ||
| 393 |
// Compute a standard GLM to get the observed Fisher Information to use as covariance matrix. |
|
| 394 |
AMatrix |
|
| 395 | ! |
Iwls::getInformation(PosInt maxIter, PosInt nObs, AVector invSqrtDispersions, IwlsResults results, |
| 396 |
const GlmModelConfig& config, const AVector& response, const AMatrix design, |
|
| 397 |
double epsilon, AMatrix unscaledPriorPrec) |
|
| 398 |
{
|
|
| 399 |
|
|
| 400 |
// initialize iteration counter and stopping criterion |
|
| 401 | ! |
PosInt iter = 0; |
| 402 | ! |
bool converged = false; |
| 403 |
|
|
| 404 | ! |
AVector finalWeights(invSqrtDispersions); |
| 405 |
|
|
| 406 |
// do IWLS for at most 30 iterations and unfulfilled stopping criterion |
|
| 407 | ! |
while ((iter++ < maxIter) && (! converged)) |
| 408 |
{
|
|
| 409 |
// compute the pseudo-observations and corresponding sqrt(weights) from the linear predictor |
|
| 410 | ! |
AVector pseudoObs(nObs); |
| 411 | ! |
AVector sqrtWeights(invSqrtDispersions); |
| 412 |
|
|
| 413 |
#pragma omp parallel for |
|
| 414 |
for(PosInt i = 0; i < nObs; ++i) |
|
| 415 |
{
|
|
| 416 |
double mu = config.link->linkinv(results.linPred(i)); |
|
| 417 |
double dmudEta = config.link->mu_eta(results.linPred(i)); |
|
| 418 |
|
|
| 419 |
pseudoObs(i) = results.linPred(i) - config.offsets(i) + (response(i) - mu) / dmudEta; |
|
| 420 |
sqrtWeights(i) *= dmudEta / sqrt(config.distribution->variance(mu)); |
|
| 421 |
} |
|
| 422 |
|
|
| 423 |
// calculate X'sqrt(W), which is needed twice |
|
| 424 | ! |
AMatrix XtsqrtW = arma::trans(design) * arma::diagmat(sqrtWeights); |
| 425 |
|
|
| 426 |
// calculate the precision matrix Q by doing a rank update: |
|
| 427 |
// if full Bayes is used, then: |
|
| 428 |
// Q = tcrossprod(X'sqrt(W)) + 1/g * unscaledPriorPrec |
|
| 429 |
// if TBF are used, then: |
|
| 430 |
// Q = tcrossprod(X'sqrt(W)) |
|
| 431 | ! |
double scaleFactor = 0.0; |
| 432 | ! |
results.qFactor = unscaledPriorPrec; //just zeros to set up matrix |
| 433 | ! |
syrk(false, |
| 434 |
false, |
|
| 435 |
XtsqrtW, |
|
| 436 |
scaleFactor, |
|
| 437 | ! |
results.qFactor); |
| 438 |
|
|
| 439 |
// decompose into Cholesky factor, Q = LL': |
|
| 440 | ! |
int info = potrf(false, |
| 441 | ! |
results.qFactor); |
| 442 |
|
|
| 443 |
// check that no error occured |
|
| 444 | ! |
if(info != 0) |
| 445 |
{
|
|
| 446 | ! |
std::ostringstream stream; |
| 447 | ! |
stream << "Cholesky factorization Q = LL' got error code " << info << |
| 448 | ! |
" in IWLS iteration " << iter; |
| 449 | ||
| 450 | ! |
throw std::domain_error(stream.str().c_str()); |
| 451 |
} |
|
| 452 |
|
|
| 453 |
// save the old coefficients vector |
|
| 454 | ! |
AVector coefs_old = results.coefs; |
| 455 |
|
|
| 456 |
// the rhs of the equation Q * m = rhs or R'R * m = rhs |
|
| 457 | ! |
pseudoObs = arma::diagmat(sqrtWeights) * pseudoObs; |
| 458 | ! |
results.coefs = XtsqrtW * pseudoObs; |
| 459 |
|
|
| 460 |
// note that we have some steps to go until the computation |
|
| 461 |
// of results.coefs is finished! |
|
| 462 |
|
|
| 463 |
// forward-backward solve LL' * v = rhs |
|
| 464 | ! |
info = potrs(false, |
| 465 | ! |
results.qFactor, |
| 466 |
results.coefs); |
|
| 467 |
|
|
| 468 |
// check that no error occured |
|
| 469 | ! |
if(info != 0) |
| 470 |
{
|
|
| 471 | ! |
std::ostringstream stream; |
| 472 | ! |
stream << "Forward-backward solve got error code " << info << |
| 473 | ! |
" in IWLS iteration " << iter; |
| 474 | ||
| 475 | ! |
throw std::domain_error(stream.str().c_str()); |
| 476 |
} |
|
| 477 |
|
|
| 478 |
// the new linear predictor is |
|
| 479 | ! |
results.linPred = design * results.coefs + config.offsets; |
| 480 |
|
|
| 481 |
// compare on the coefficients scale, but not in the first iteration where |
|
| 482 |
// it is not clear from where coefs_old came. Be safe and always |
|
| 483 |
// decide for non-convergence in this case. |
|
| 484 | ! |
converged = (iter > 1) ? (criterion(coefs_old, results.coefs) < epsilon) : false; |
| 485 | ! |
finalWeights = sqrtWeights; |
| 486 |
} |
|
| 487 |
|
|
| 488 |
// do not (!) |
|
| 489 |
// warn if IWLS did not converge within the maximum number of iterations |
|
| 490 |
// because the maximum number of iterations can be set by user of this function. |
|
| 491 |
|
|
| 492 |
// compute log precision determinant |
|
| 493 | ! |
results.logPrecisionDeterminant = 2.0 * arma::as_scalar(arma::sum(arma::log(arma::diagvec(results.qFactor)))); |
| 494 |
|
|
| 495 |
|
|
| 496 |
//to calculate the observed Information we need the dispersion |
|
| 497 |
double dispersion; |
|
| 498 | ! |
if( config.familyString == "poisson" || config.familyString == "binomial" ){
|
| 499 | ! |
dispersion = 1; |
| 500 |
} else {
|
|
| 501 | ! |
dispersion = dot(finalWeights, arma::square(response - results.linPred))/(nObs-results.coefs.n_elem); |
| 502 |
} |
|
| 503 |
|
|
| 504 |
|
|
| 505 | ! |
AMatrix observed = arma::trans(design) * arma::diagmat(arma::square(finalWeights)) * design / dispersion; |
| 506 |
|
|
| 507 |
|
|
| 508 | ! |
AVector residuals = arma::square(response - results.linPred); |
| 509 |
|
|
| 510 | ! |
return(observed); |
| 511 |
} |
| 1 |
/* |
|
| 2 |
* zdensity.h |
|
| 3 |
* |
|
| 4 |
* Created on: 16.11.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef IWLS_H_ |
|
| 9 |
#define IWLS_H_ |
|
| 10 | ||
| 11 |
#include "design.h" |
|
| 12 |
#include "types.h" |
|
| 13 | ||
| 14 | ||
| 15 |
// only one parameter set for the GLM |
|
| 16 |
struct Parameter |
|
| 17 |
{
|
|
| 18 |
// ctr |
|
| 19 | 12951x |
Parameter(const AVector& coefs, |
| 20 | 12951x |
double z) : |
| 21 | 12951x |
coefs(coefs), |
| 22 | 12951x |
z(z) |
| 23 |
{
|
|
| 24 |
} |
|
| 25 | ||
| 26 |
// default ctr |
|
| 27 | 41x |
Parameter(PosInt nCoefs) : |
| 28 | 41x |
coefs(nCoefs), |
| 29 | 41x |
z(R_NaReal) |
| 30 |
{
|
|
| 31 |
} |
|
| 32 | ||
| 33 |
// the sampled regression coefficients (including the intercept) |
|
| 34 |
AVector coefs; |
|
| 35 | ||
| 36 |
// the sampled log covariance factor |
|
| 37 |
double z; |
|
| 38 |
}; |
|
| 39 | ||
| 40 | ||
| 41 | ||
| 42 | ||
| 43 |
// all info from the iwls results |
|
| 44 |
struct IwlsResults |
|
| 45 |
{
|
|
| 46 |
// ctr |
|
| 47 | 650x |
IwlsResults(const AVector& linPred, |
| 48 | 650x |
PosInt nCoefs) : |
| 49 | 650x |
linPred(linPred), |
| 50 | 650x |
coefs(nCoefs), |
| 51 | 650x |
qFactor(nCoefs, nCoefs) |
| 52 |
{
|
|
| 53 |
} |
|
| 54 | ||
| 55 |
// default ctr |
|
| 56 | 41x |
IwlsResults(PosInt nObs, |
| 57 | 41x |
PosInt nCoefs) : |
| 58 | 41x |
linPred(nObs), |
| 59 | 41x |
coefs(nCoefs), |
| 60 | 41x |
qFactor(nCoefs, nCoefs) |
| 61 |
{
|
|
| 62 |
} |
|
| 63 | ||
| 64 |
// the linear predictor (including offsets!) |
|
| 65 |
AVector linPred; |
|
| 66 | ||
| 67 |
// the corresponding coefficient vector, |
|
| 68 |
AVector coefs; |
|
| 69 | ||
| 70 |
// lower-triangular Cholesky factor of the precision matrix |
|
| 71 |
AMatrix qFactor; |
|
| 72 | ||
| 73 |
// the log determinant of the precision matrix |
|
| 74 |
double logPrecisionDeterminant; |
|
| 75 |
}; |
|
| 76 | ||
| 77 | ||
| 78 |
// 03/07/2013: add offsets |
|
| 79 | ||
| 80 |
class Iwls {
|
|
| 81 | ||
| 82 |
public: |
|
| 83 |
// constructor: constructs the Iwls object for given model and data and |
|
| 84 |
// start linear predictor |
|
| 85 |
Iwls(const ModelPar &mod, |
|
| 86 |
const DataValues& data, |
|
| 87 |
const FpInfo& fpInfo, |
|
| 88 |
const UcInfo& ucInfo, |
|
| 89 |
const FixInfo& fixInfo, |
|
| 90 |
const GlmModelConfig& config, |
|
| 91 |
const AVector& linPredStart, |
|
| 92 |
bool conditional, |
|
| 93 |
double epsilon, |
|
| 94 |
bool debug, |
|
| 95 |
bool tbf); |
|
| 96 | ||
| 97 |
// do the Iwls algorithm for a given covariance factor g and current linear predictor |
|
| 98 |
// linPred, |
|
| 99 |
// until convergence or until the maximum number of iterations is reached. |
|
| 100 |
// so also only one iwls step can be performed with this function. |
|
| 101 |
// Note that the linear predictor is the sum of X^T * beta and the vector of offsets. |
|
| 102 |
// returns the number of iterations. |
|
| 103 |
PosInt |
|
| 104 |
startWithLastLinPred(PosInt maxIter, |
|
| 105 |
double g); |
|
| 106 | ||
| 107 |
// do the Iwls algorithm for a given covariance factor g and new start linear predictor |
|
| 108 |
// linPredStart. |
|
| 109 |
PosInt |
|
| 110 |
startWithNewLinPred(PosInt maxIter, |
|
| 111 |
double g, |
|
| 112 |
const AVector& linPredStart); |
|
| 113 | ||
| 114 |
// do the Iwls algorithm for a given covariance factor g and new start coefficients vector |
|
| 115 |
// coefsStart. |
|
| 116 |
PosInt |
|
| 117 |
startWithNewCoefs(PosInt maxIter, |
|
| 118 |
double g, |
|
| 119 |
const AVector& coefsStart); |
|
| 120 | ||
| 121 | ||
| 122 |
// compute the log of the (unnormalized) |
|
| 123 |
// posterior density for a given parameter consisting of the coefficients vector and z |
|
| 124 |
double |
|
| 125 |
computeLogUnPosteriorDens(const Parameter& sample) const; |
|
| 126 | ||
| 127 |
// compute the deviance of the current model, which in R is done by the glm.fit function |
|
| 128 |
// This is required to compute the TBF. |
|
| 129 |
// Note that this changes the Iwls object, because it iterates until convergence |
|
| 130 |
// or until the maximum number of iterations has been reached. |
|
| 131 |
double |
|
| 132 |
computeDeviance(PosInt maxIter); |
|
| 133 | ||
| 134 |
// getter for results |
|
| 135 |
IwlsResults |
|
| 136 | 15151x |
getResults() const |
| 137 |
{
|
|
| 138 | 15151x |
return results; |
| 139 |
} |
|
| 140 | ||
| 141 |
// Get the fisher information for the desired model |
|
| 142 |
AMatrix getInformation(PosInt maxIter, |
|
| 143 |
PosInt nObs, |
|
| 144 |
AVector invSqrtDispersions, |
|
| 145 |
IwlsResults results, |
|
| 146 |
const GlmModelConfig& config, |
|
| 147 |
const AVector& response, |
|
| 148 |
const AMatrix design, |
|
| 149 |
double epsilon, |
|
| 150 |
AMatrix unscaledPriorPrec |
|
| 151 |
); |
|
| 152 |
|
|
| 153 |
// this can be public: |
|
| 154 | ||
| 155 |
// design matrix for this model |
|
| 156 |
const AMatrix design; |
|
| 157 | ||
| 158 |
// dimension including the intercept ("ncol(design)" in R syntax)
|
|
| 159 |
const PosInt nCoefs; |
|
| 160 | ||
| 161 |
// is this the null model? |
|
| 162 |
const bool isNullModel; |
|
| 163 | ||
| 164 |
// use a fixed z? |
|
| 165 |
const bool useFixedZ; |
|
| 166 | ||
| 167 |
// number of observations |
|
| 168 |
const PosInt nObs; |
|
| 169 | ||
| 170 | ||
| 171 |
private: |
|
| 172 | ||
| 173 | ||
| 174 |
// the log of the determinant of the crossproduct of scaledDesignWithoutIntercept |
|
| 175 |
// This is B'(dispersions)^(-1)B. |
|
| 176 |
double logScaledDesignWithoutInterceptCrossprodDeterminant; |
|
| 177 | ||
| 178 |
// the response vector |
|
| 179 |
const AVector& response; |
|
| 180 | ||
| 181 |
// also needed in function calls (mean, variance and other glm functions) |
|
| 182 |
const GlmModelConfig& config; |
|
| 183 | ||
| 184 |
// This is diag(dispersions)^(-1/2), use diagmat to interpret it as diagonal matrix! |
|
| 185 |
AVector invSqrtDispersions; |
|
| 186 | ||
| 187 |
// unscaled prior precision matrix R^-1 = blockDiag(0, 1/c B'(dispersions)^(-1)B) (without g^-1) |
|
| 188 |
AMatrix unscaledPriorPrec; |
|
| 189 | ||
| 190 |
// container for the results of the iwls computation: |
|
| 191 |
IwlsResults results; |
|
| 192 | ||
| 193 |
// the convergence epsilon |
|
| 194 |
const double epsilon; |
|
| 195 | ||
| 196 |
// status messages?? |
|
| 197 |
// const bool verbose; |
|
| 198 | ||
| 199 |
// use TBF methodology? |
|
| 200 |
const bool tbf; |
|
| 201 |
}; |
|
| 202 | ||
| 203 | ||
| 204 |
#endif /* IWLS_H_ */ |
| 1 |
/* |
|
| 2 |
* linalgInterface.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 10.12.2010 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#ifndef USE_FC_LEN_T |
|
| 9 |
# define USE_FC_LEN_T |
|
| 10 |
#endif |
|
| 11 | ||
| 12 |
#include <cassert> |
|
| 13 |
#define ARMA_BLAS_CAPITALS |
|
| 14 |
#define ARMA_LAPACK_CAPITALS |
|
| 15 |
#include "linalgInterface.h" |
|
| 16 | ||
| 17 |
#include <Rconfig.h> |
|
| 18 |
#include <R_ext/Lapack.h> |
|
| 19 |
#include <R_ext/BLAS.h> |
|
| 20 | ||
| 21 |
#ifndef FCONE |
|
| 22 |
# define FCONE |
|
| 23 |
#endif |
|
| 24 | ||
| 25 |
// constants: |
|
| 26 |
static const double doubleOne = 1.0; |
|
| 27 |
static const int intOne = 1; |
|
| 28 | ||
| 29 | ||
| 30 | ||
| 31 |
// triangular solve of L * x = R |
|
| 32 |
// where L can be provided lower or upper-triangular and be transposed. |
|
| 33 |
// the solution is directly written into R. |
|
| 34 |
void |
|
| 35 | 41902200x |
trs(const bool upper, |
| 36 |
const bool transpose, |
|
| 37 |
const AMatrix& L, |
|
| 38 |
AMatrix& R) |
|
| 39 |
{
|
|
| 40 | 41902200x |
const char* side = "L"; |
| 41 |
const char* uplo = upper ? "U" : "L"; |
|
| 42 | 41902200x |
const char* transa = transpose ? "T" : "N"; |
| 43 | 41902200x |
const char* diag = "N"; |
| 44 | ||
| 45 | 41902200x |
const int m = R.n_rows; |
| 46 | 41902200x |
const int n = R.n_cols; |
| 47 |
assert(static_cast<PosInt>(m) == L.n_rows); |
|
| 48 | ||
| 49 | 83804400x |
F77_CALL(dtrsm)(side, |
| 50 |
uplo, |
|
| 51 |
transa, |
|
| 52 |
diag, |
|
| 53 |
& m, |
|
| 54 |
& n, |
|
| 55 |
& doubleOne, |
|
| 56 |
L.memptr(), |
|
| 57 |
& m, |
|
| 58 |
R.memptr(), |
|
| 59 |
& m FCONE FCONE FCONE FCONE); |
|
| 60 |
} |
|
| 61 | ||
| 62 |
// Cholesky decomposition of (symmetric) A, |
|
| 63 |
// which can either be provided in lower or upper-triangular storage. |
|
| 64 |
// Result will be in the same layout. |
|
| 65 |
// Returns an error code which should be zero if all went well. |
|
| 66 |
int |
|
| 67 | 258810x |
potrf(const bool upper, |
| 68 |
AMatrix& A) |
|
| 69 |
{
|
|
| 70 |
const char* uplo = upper ? "U" : "L"; |
|
| 71 | 258810x |
const int n = A.n_rows; |
| 72 |
assert(A.is_square()); |
|
| 73 | 258810x |
int info = 0; |
| 74 | ||
| 75 | 258810x |
F77_CALL(dpotrf)(uplo, |
| 76 |
& n, |
|
| 77 |
A.memptr(), |
|
| 78 |
& n, |
|
| 79 |
& info FCONE); |
|
| 80 | ||
| 81 | 258810x |
return info; |
| 82 |
} |
|
| 83 | ||
| 84 |
// Solve the system LL' * x = R |
|
| 85 |
// Instead of lower-triangular L also the upper-triangular form can be provided. |
|
| 86 |
// The solution is directly written into R. |
|
| 87 |
// Returns an error code which should be zero if all went well. |
|
| 88 |
int |
|
| 89 | 257790x |
potrs(const bool upper, |
| 90 |
const AMatrix& L, |
|
| 91 |
AMatrix& R) |
|
| 92 |
{
|
|
| 93 |
const char* uplo = upper ? "U" : "L"; |
|
| 94 | 257790x |
const int n = L.n_rows; |
| 95 |
assert(L.is_square()); |
|
| 96 |
assert(static_cast<PosInt>(n) == R.n_rows); |
|
| 97 | 257790x |
const int nrhs = R.n_cols; |
| 98 | 257790x |
int info = 0; |
| 99 | ||
| 100 | 515580x |
F77_CALL(dpotrs)(uplo, |
| 101 |
& n, |
|
| 102 |
& nrhs, |
|
| 103 |
L.memptr(), |
|
| 104 |
& n, |
|
| 105 |
R.memptr(), |
|
| 106 |
& n, |
|
| 107 |
& info FCONE); |
|
| 108 | ||
| 109 | 257790x |
return info; |
| 110 |
} |
|
| 111 | ||
| 112 |
// Do a symmetric rank k update of C: |
|
| 113 |
// C := A * A' + beta * C |
|
| 114 |
// where C must be symmetric and can be provided in lower or upper-triangular storage. |
|
| 115 |
// Result will be in the same layout. |
|
| 116 |
// If transpose == true, then the crossproduct will be added, i.e., C := A' * A + beta*C. |
|
| 117 |
void |
|
| 118 | 257550x |
syrk(const bool upper, |
| 119 |
const bool transpose, |
|
| 120 |
const AMatrix& A, |
|
| 121 |
const double beta, |
|
| 122 |
AMatrix& C) |
|
| 123 |
{
|
|
| 124 |
const char* uplo = upper ? "U" : "L"; |
|
| 125 |
const char* trans = transpose ? "T" : "N"; |
|
| 126 | 257550x |
const int n = C.n_rows; |
| 127 |
assert(C.is_square()); |
|
| 128 |
const int k = transpose ? A.n_rows : A.n_cols; |
|
| 129 |
const int lda = transpose ? k : n; |
|
| 130 | ||
| 131 | 515100x |
F77_CALL(dsyrk)(uplo, |
| 132 |
trans, |
|
| 133 |
& n, |
|
| 134 |
& k, |
|
| 135 |
& doubleOne, |
|
| 136 |
A.memptr(), |
|
| 137 |
& lda, |
|
| 138 |
& beta, |
|
| 139 |
C.memptr(), |
|
| 140 |
& n FCONE FCONE); |
|
| 141 |
} |
|
| 142 | ||
| 143 |
// Do a triangular matrix-vector multiplication: |
|
| 144 |
// x := A*x, or x := A'*x, |
|
| 145 |
// |
|
| 146 |
// where x is an n element vector and A is an n by n upper or lower triangular matrix. |
|
| 147 |
void |
|
| 148 | 13200x |
trmv(const bool upper, |
| 149 |
const bool transpose, |
|
| 150 |
const AMatrix& A, |
|
| 151 |
AVector& x) |
|
| 152 |
{
|
|
| 153 |
const char* uplo = upper ? "U" : "L"; |
|
| 154 |
const char* trans = transpose ? "T" : "N"; |
|
| 155 | 13200x |
const char* diag = "N"; |
| 156 | 13200x |
const int n = A.n_rows; |
| 157 | ||
| 158 | 39600x |
F77_CALL(dtrmv)(uplo, |
| 159 |
trans, |
|
| 160 |
diag, |
|
| 161 |
& n, |
|
| 162 |
A.memptr(), |
|
| 163 |
& n, |
|
| 164 |
x.memptr(), |
|
| 165 |
& intOne FCONE FCONE FCONE); |
|
| 166 |
} |
|
| 167 | ||
| 168 | ||
| 169 | ||
| 170 |
| 1 |
/* |
|
| 2 |
* optimize.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 08.12.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
*/ |
|
| 7 | ||
| 8 |
#include "optimize.h" |
|
| 9 |
#include "rcppExport.h" |
|
| 10 |
#include "functionWraps.h" |
|
| 11 | ||
| 12 |
using namespace Rcpp; |
|
| 13 | ||
| 14 |
// ***************************************************************************************************// |
|
| 15 | ||
| 16 |
// just an R interface to the brent optimization routine, for regression testing purposes. |
|
| 17 |
// [[Rcpp::export]] |
|
| 18 |
SEXP |
|
| 19 | 40x |
cpp_optimize(SEXP R_function, SEXP R_minx, SEXP R_maxx, SEXP R_precision) |
| 20 |
{
|
|
| 21 |
// extract function |
|
| 22 |
// R_interface = CDR(R_interface); |
|
| 23 |
// SEXP R_function = CAR(R_interface); |
|
| 24 |
// |
|
| 25 |
// // constraints on the argument |
|
| 26 |
// R_interface = CDR(R_interface); |
|
| 27 |
// SEXP R_minx = CAR(R_interface); |
|
| 28 |
// |
|
| 29 |
// R_interface = CDR(R_interface); |
|
| 30 |
// SEXP R_maxx= CAR(R_interface); |
|
| 31 |
// |
|
| 32 |
// // and the settings |
|
| 33 |
// R_interface = CDR(R_interface); |
|
| 34 |
// SEXP R_precision = CAR(R_interface); |
|
| 35 | ||
| 36 |
// wrap the R function to a cached function |
|
| 37 | 40x |
RFunction fun(R_function); |
| 38 | 40x |
CachedFunction<RFunction> cachedFun(fun); |
| 39 | ||
| 40 |
// then minimize (with constraints) |
|
| 41 |
Brent<CachedFunction<RFunction> > brent(cachedFun, |
|
| 42 |
Rf_asReal(R_minx), |
|
| 43 |
Rf_asReal(R_maxx), |
|
| 44 | 40x |
Rf_asReal(R_precision)); |
| 45 | ||
| 46 | 40x |
double xMin = brent.minimize(); |
| 47 | ||
| 48 |
// now the inverse Hessian at the minimum |
|
| 49 | 40x |
AccurateNumericInvHessian<CachedFunction<RFunction> > funInvHess(cachedFun); |
| 50 | 40x |
double invHessMin = funInvHess(xMin); |
| 51 | ||
| 52 |
// pack results into R list |
|
| 53 | 80x |
return List::create(_["par"] = xMin, |
| 54 | 80x |
_["inv.hessian"] = invHessMin, |
| 55 | 160x |
_["evaluations"] = cachedFun.getCache().convert2list()); |
| 56 |
} |
|
| 57 | ||
| 58 |
// ***************************************************************************************************// |
|
| 59 | ||
| 60 |
// End of file. |
| 1 |
#include <Rcpp.h> |
|
| 2 |
using namespace Rcpp; |
|
| 3 | ||
| 4 |
// [[Rcpp::export]] |
|
| 5 | 4x |
NumericMatrix predBMAcpp(NumericMatrix SurvMat, NumericMatrix LpMat, NumericVector WtVec) {
|
| 6 |
|
|
| 7 | 4x |
int nModels = SurvMat.nrow(); |
| 8 | 4x |
int nTimes = SurvMat.ncol(); |
| 9 | 4x |
int nObs = LpMat.nrow(); |
| 10 |
|
|
| 11 |
// arma::mat Pred(nTimes, nObs) //nTimes rows and nObs columns |
|
| 12 | 4x |
Rcpp::NumericMatrix Pred(nTimes, nObs); //nTimes rows and nObs columns |
| 13 |
|
|
| 14 | 8x |
for(int i=0; i < nModels; i++){
|
| 15 | 12x |
for(int t=0; t < nTimes; t++){
|
| 16 | 48x |
for(int Ob=0; Ob < nObs; Ob++){
|
| 17 | 40x |
double S = SurvMat(i,t); |
| 18 | 40x |
double eLP = LpMat(Ob,i); |
| 19 | 40x |
Pred(t,Ob) = Pred(t,Ob) + WtVec[i] * pow(S,eLP); |
| 20 |
} |
|
| 21 |
} |
|
| 22 |
//Rprintf("Model %d of %d",i,nModels);
|
|
| 23 |
} |
|
| 24 |
|
|
| 25 | 8x |
return Pred; |
| 26 |
} |
|
| 27 |
| 1 |
/* |
|
| 2 |
* sampleGlm.cpp |
|
| 3 |
* |
|
| 4 |
* Created on: 10.12.2009 |
|
| 5 |
* Author: daniel |
|
| 6 |
* |
|
| 7 |
* 13/07/2015 Replace assert() with Rccp:Stop() |
|
| 8 |
*/ |
|
| 9 | ||
| 10 |
#include "rcppExport.h" |
|
| 11 |
#include "combinatorics.h" |
|
| 12 |
#include "dataStructure.h" |
|
| 13 |
#include "types.h" |
|
| 14 |
#include "iwls.h" |
|
| 15 |
#include "design.h" |
|
| 16 |
#include "coxfit.h" |
|
| 17 |
#include "bfgs.h" |
|
| 18 |
#include "optimize.h" |
|
| 19 |
#include "fpUcHandling.h" |
|
| 20 |
#include "linalgInterface.h" |
|
| 21 |
//#include <cassert> |
|
| 22 | ||
| 23 |
#ifdef _OPENMP |
|
| 24 |
#include <omp.h> |
|
| 25 |
#endif |
|
| 26 | ||
| 27 |
using namespace Rcpp; |
|
| 28 | ||
| 29 |
// ***************************************************************************************************// |
|
| 30 | ||
| 31 |
struct MarginalZ |
|
| 32 |
{
|
|
| 33 | 82x |
MarginalZ(const RFunction& logDens, |
| 34 | 82x |
const RFunction& gen) : |
| 35 | 82x |
logDens(logDens), |
| 36 | 82x |
gen(gen) |
| 37 |
{
|
|
| 38 |
} |
|
| 39 | ||
| 40 | ||
| 41 |
const RFunction logDens; |
|
| 42 |
const RFunction gen; |
|
| 43 |
}; |
|
| 44 | ||
| 45 |
// ***************************************************************************************************// |
|
| 46 | ||
| 47 |
struct Options |
|
| 48 |
{
|
|
| 49 | 82x |
Options(bool estimateMargLik, |
| 50 |
bool verbose, |
|
| 51 |
bool debug, |
|
| 52 |
bool isNullModel, |
|
| 53 |
bool useFixedZ, |
|
| 54 |
bool tbf, |
|
| 55 |
bool doGlm, |
|
| 56 |
PosInt iterations, |
|
| 57 |
PosInt burnin, |
|
| 58 | 82x |
PosInt step) : |
| 59 | 82x |
estimateMargLik(estimateMargLik), verbose(verbose), |
| 60 | 82x |
debug(debug), isNullModel(isNullModel), useFixedZ(useFixedZ), |
| 61 | 82x |
tbf(tbf), doGlm(doGlm), |
| 62 | 82x |
nSamples(ceil((iterations - burnin) * 1.0 / step)), |
| 63 | 82x |
iterations(iterations), burnin(burnin), |
| 64 | 82x |
step(step) |
| 65 |
{
|
|
| 66 |
} |
|
| 67 | ||
| 68 |
const bool estimateMargLik; |
|
| 69 |
const bool verbose; |
|
| 70 |
const bool debug; |
|
| 71 |
const bool isNullModel; |
|
| 72 |
const bool useFixedZ; |
|
| 73 |
const bool tbf; |
|
| 74 |
const bool doGlm; |
|
| 75 | ||
| 76 |
const PosInt nSamples; |
|
| 77 |
const PosInt iterations; |
|
| 78 |
const PosInt burnin; |
|
| 79 |
const PosInt step; |
|
| 80 |
}; |
|
| 81 | ||
| 82 |
// ***************************************************************************************************// |
|
| 83 | ||
| 84 |
// this is just for being sure that the memory is returned correctly even in case |
|
| 85 |
// of an error or user interrupt from R. |
|
| 86 |
struct Fitter |
|
| 87 |
{
|
|
| 88 | 82x |
Fitter() : |
| 89 | 82x |
iwlsObject(0), |
| 90 | 82x |
coxfitObject(0) |
| 91 |
{}
|
|
| 92 | ||
| 93 | 82x |
~Fitter() |
| 94 |
{
|
|
| 95 | 82x |
delete iwlsObject; |
| 96 | 82x |
delete coxfitObject; |
| 97 |
} |
|
| 98 | ||
| 99 |
Iwls * iwlsObject; |
|
| 100 |
Coxfit * coxfitObject; |
|
| 101 |
}; |
|
| 102 | ||
| 103 | ||
| 104 | ||
| 105 |
// ***************************************************************************************************// |
|
| 106 | ||
| 107 | ||
| 108 |
class Mcmc |
|
| 109 |
{
|
|
| 110 | ||
| 111 |
public: |
|
| 112 |
// ctr |
|
| 113 | 82x |
Mcmc(const MarginalZ& marginalz, PosInt nObs, PosInt nCoefs) : |
| 114 | 82x |
sample(nCoefs), |
| 115 | 82x |
proposalInfo(nObs, nCoefs), |
| 116 | 82x |
marginalz(marginalz) |
| 117 |
{
|
|
| 118 |
} |
|
| 119 | ||
| 120 |
// the current parameter sample |
|
| 121 |
Parameter sample; |
|
| 122 | ||
| 123 |
// the unnormalized log posterior of this sample |
|
| 124 |
double logUnPosterior; |
|
| 125 | ||
| 126 |
// info about the normal proposal distribution given the sampled z |
|
| 127 |
IwlsResults proposalInfo; |
|
| 128 | ||
| 129 |
// compute the log of the normalized proposal density when the z log density is provided |
|
| 130 |
// normalize correctly in order not to get problems (perhaps) with the Chib-Jeliazkov estimate |
|
| 131 |
// computation. |
|
| 132 |
double |
|
| 133 | 4400x |
computeLogProposalDens() const |
| 134 |
{
|
|
| 135 |
// Be careful: qFactor is in fact lower-triangular, so a simple multiplication would fail! |
|
| 136 |
// use instead directly a BLAS routine for this multiplication. |
|
| 137 | 8800x |
AVector tmp = sample.coefs - proposalInfo.coefs; |
| 138 | 4400x |
trmv(false, true, proposalInfo.qFactor, tmp); |
| 139 | ||
| 140 | 4400x |
return 0.5 * (proposalInfo.logPrecisionDeterminant - arma::dot(tmp, tmp)) - |
| 141 | 4400x |
M_LN_SQRT_2PI * proposalInfo.qFactor.n_rows + |
| 142 | 8800x |
marginalz.logDens(sample.z); |
| 143 |
} |
|
| 144 | ||
| 145 |
// non-default assignment operator |
|
| 146 |
Mcmc& |
|
| 147 | 2000x |
operator=(const Mcmc& rhs) |
| 148 |
{
|
|
| 149 | 2000x |
if(this == &rhs) |
| 150 |
{
|
|
| 151 | ! |
return *this; |
| 152 |
} |
|
| 153 |
else |
|
| 154 |
{
|
|
| 155 | 2000x |
sample = rhs.sample; |
| 156 | 2000x |
logUnPosterior = rhs.logUnPosterior; |
| 157 | 2000x |
proposalInfo = rhs.proposalInfo; |
| 158 | ||
| 159 | 2000x |
return *this; |
| 160 |
} |
|
| 161 |
} |
|
| 162 | ||
| 163 | ||
| 164 |
private: |
|
| 165 |
// the marginal z info: same for all Mcmc objects, |
|
| 166 |
// therefore it is not assigned by the assignment operator |
|
| 167 |
const MarginalZ marginalz; |
|
| 168 |
// important: copy the object, because otherwise (reference/pointer) |
|
| 169 |
// we are not sure that the functions are still available if we do not use "new" |
|
| 170 |
}; |
|
| 171 | ||
| 172 |
// ***************************************************************************************************// |
|
| 173 | ||
| 174 | ||
| 175 |
struct Samples |
|
| 176 |
{
|
|
| 177 |
// constructor: basically allocates beta matrix |
|
| 178 | 82x |
Samples(PosInt nCoefs, PosInt nSamples) : |
| 179 | 82x |
coefsSamples(nCoefs, nSamples), |
| 180 | 82x |
nSaved(0) |
| 181 |
{
|
|
| 182 |
} |
|
| 183 | ||
| 184 |
// save a sample consisting of coefs and z |
|
| 185 |
void |
|
| 186 | 160700x |
storeParameters(const Parameter& sample) |
| 187 |
{
|
|
| 188 | 321400x |
coefsSamples.col(nSaved++) = sample.coefs; |
| 189 | 160700x |
zSamples.push_back(sample.z); |
| 190 |
} |
|
| 191 | ||
| 192 |
// save terms for marginal likelihood estimate |
|
| 193 |
void |
|
| 194 | 100x |
storeMargLikTerms(double num, double denom) |
| 195 |
{
|
|
| 196 | 100x |
numerator.push_back(num); |
| 197 | 100x |
denominator.push_back(denom); |
| 198 |
} |
|
| 199 | ||
| 200 |
// output everything to an R list |
|
| 201 |
List |
|
| 202 |
convert2list() const; |
|
| 203 | ||
| 204 |
private: |
|
| 205 |
// nCoefs x nSamples: |
|
| 206 |
AMatrix coefsSamples; |
|
| 207 | ||
| 208 |
// counts the number of saved parameters, so that we know where to store the |
|
| 209 |
// next coefficients vector |
|
| 210 |
PosInt nSaved; |
|
| 211 | ||
| 212 |
// is gradually extended: |
|
| 213 |
MyDoubleVector zSamples; |
|
| 214 | ||
| 215 |
// possibly stays empty if not required by the user: |
|
| 216 |
// the numerator and denominator terms for the marginal likelihood estimate |
|
| 217 |
MyDoubleVector numerator; |
|
| 218 |
MyDoubleVector denominator; |
|
| 219 |
}; |
|
| 220 | ||
| 221 |
List |
|
| 222 | 82x |
Samples::convert2list() const |
| 223 |
{
|
|
| 224 | 164x |
return List::create(_["coefficients"] = coefsSamples, |
| 225 | 164x |
_["z"] = zSamples, |
| 226 | 164x |
_["margLikNumerator"] = numerator, |
| 227 | 246x |
_["margLikDenominator"] = denominator); |
| 228 |
} |
|
| 229 | ||
| 230 | ||
| 231 |
// ***************************************************************************************************// |
|
| 232 | ||
| 233 | ||
| 234 |
// get a vector with normal variates from N(mean, sd^2) |
|
| 235 |
AVector |
|
| 236 | 162700x |
drawNormalVariates(PosInt n, double mean, double sd) |
| 237 |
{
|
|
| 238 | 162700x |
AVector ret(n); |
| 239 | ||
| 240 |
// use R's random number generator |
|
| 241 | 162700x |
GetRNGstate(); |
| 242 | ||
| 243 | 713314x |
for (PosInt i = 0; i < n; ++i) |
| 244 |
{
|
|
| 245 | 1101228x |
ret(i) = Rf_rnorm(mean, sd); |
| 246 |
} |
|
| 247 | ||
| 248 |
// no RNs required anymore |
|
| 249 | 162700x |
PutRNGstate(); |
| 250 | ||
| 251 | 162700x |
return ret; |
| 252 |
} |
|
| 253 | ||
| 254 |
// draw a single random normal vector from N(mean, (precisionCholeskyFactor * t(precisionCholeskyFactor))^(-1)) |
|
| 255 |
AVector |
|
| 256 | 2100x |
drawNormalVector(const AVector& mean, |
| 257 |
const AMatrix& precisionCholeskyFactor) |
|
| 258 |
{
|
|
| 259 |
// get vector from N(0, I) |
|
| 260 | 2100x |
AVector w = drawNormalVariates(mean.n_rows, // as many normal variates as required by the dimension. |
| 261 |
0.0, |
|
| 262 | 2100x |
1.0); |
| 263 | ||
| 264 |
// then solve L' * ret = w, and overwrite w with the result: |
|
| 265 | 2100x |
trs(false, |
| 266 |
true, |
|
| 267 |
precisionCholeskyFactor, |
|
| 268 |
w); |
|
| 269 | ||
| 270 |
// return the shifted vector |
|
| 271 | 4200x |
return (w + mean); |
| 272 |
} |
|
| 273 | ||
| 274 | ||
| 275 |
// draw a single uniform random variable: |
|
| 276 |
// be careful with the seed because the z generator function also uses it (via R) |
|
| 277 |
double |
|
| 278 | 2000x |
unif() |
| 279 |
{
|
|
| 280 | 2000x |
GetRNGstate(); |
| 281 | ||
| 282 | 2000x |
double ret = unif_rand(); |
| 283 | ||
| 284 | 2000x |
PutRNGstate(); |
| 285 | ||
| 286 | 2000x |
return ret; |
| 287 |
} |
|
| 288 | ||
| 289 | ||
| 290 |
// ***************************************************************************************************// |
|
| 291 | ||
| 292 | ||
| 293 |
// R call is: |
|
| 294 |
// |
|
| 295 |
// samples <- .External(cpp_sampleGlm, |
|
| 296 |
// model, |
|
| 297 |
// attrs$data, |
|
| 298 |
// attrs$fpInfos, |
|
| 299 |
// attrs$ucInfos, |
|
| 300 |
// attrs$fixInfos, |
|
| 301 |
// attrs$distribution, |
|
| 302 |
// newdata, |
|
| 303 |
// options, |
|
| 304 |
// marginalz) |
|
| 305 | ||
| 306 | ||
| 307 | ||
| 308 | ||
| 309 |
// [[Rcpp::export]] |
|
| 310 |
SEXP |
|
| 311 | 82x |
cpp_sampleGlm( List rcpp_model,List rcpp_data, List rcpp_fpInfos, List rcpp_ucInfos, |
| 312 |
List rcpp_fixInfos, List rcpp_distribution, List rcpp_searchConfig, |
|
| 313 |
List rcpp_options, List rcpp_marginalz) |
|
| 314 |
{
|
|
| 315 |
// ---------------------------------------------------------------------------------- |
|
| 316 |
// extract arguments |
|
| 317 |
// ---------------------------------------------------------------------------------- |
|
| 318 | ||
| 319 |
// r_interface = CDR(r_interface); |
|
| 320 |
// List rcpp_model(CAR(r_interface)); |
|
| 321 |
// |
|
| 322 |
// r_interface = CDR(r_interface); |
|
| 323 |
// List rcpp_data(CAR(r_interface)); |
|
| 324 |
// |
|
| 325 |
// r_interface = CDR(r_interface); |
|
| 326 |
// List rcpp_fpInfos(CAR(r_interface)); |
|
| 327 |
// |
|
| 328 |
// r_interface = CDR(r_interface); |
|
| 329 |
// List rcpp_ucInfos(CAR(r_interface)); |
|
| 330 |
// |
|
| 331 |
// r_interface = CDR(r_interface); |
|
| 332 |
// List rcpp_fixInfos(CAR(r_interface)); |
|
| 333 |
// |
|
| 334 |
// r_interface = CDR(r_interface); |
|
| 335 |
// List rcpp_distribution(CAR(r_interface)); |
|
| 336 |
// |
|
| 337 |
// r_interface = CDR(r_interface); |
|
| 338 |
// List rcpp_searchConfig(CAR(r_interface)); |
|
| 339 |
// |
|
| 340 |
// r_interface = CDR(r_interface); |
|
| 341 |
// List rcpp_options(CAR(r_interface)); |
|
| 342 |
// |
|
| 343 |
// r_interface = CDR(r_interface); |
|
| 344 |
// List rcpp_marginalz(CAR(r_interface)); |
|
| 345 | ||
| 346 |
// ---------------------------------------------------------------------------------- |
|
| 347 |
// unpack the R objects |
|
| 348 |
// ---------------------------------------------------------------------------------- |
|
| 349 | ||
| 350 |
// data: |
|
| 351 | 164x |
const NumericMatrix n_x = rcpp_data["x"]; |
| 352 | 82x |
const AMatrix x(n_x.begin(), n_x.nrow(), |
| 353 | 82x |
n_x.ncol()); |
| 354 | ||
| 355 | 164x |
const NumericMatrix n_xCentered = rcpp_data["xCentered"]; |
| 356 | 82x |
const AMatrix xCentered(n_xCentered.begin(), n_xCentered.nrow(), |
| 357 | 82x |
n_xCentered.ncol()); |
| 358 | ||
| 359 | 164x |
const NumericVector n_y = rcpp_data["y"]; |
| 360 | 82x |
const AVector y(n_y.begin(), n_y.size()); |
| 361 | ||
| 362 | 164x |
const IntVector censInd = as<IntVector>(rcpp_data["censInd"]); |
| 363 | ||
| 364 |
// FP configuration: |
|
| 365 | ||
| 366 |
// vector of maximum fp degrees |
|
| 367 | 164x |
const PosIntVector fpmaxs = as<PosIntVector>(rcpp_fpInfos["fpmaxs"]); |
| 368 |
// corresponding vector of fp column indices |
|
| 369 | 164x |
const PosIntVector fppos = rcpp_fpInfos["fppos"]; |
| 370 |
// corresponding vector of power set cardinalities |
|
| 371 | 164x |
const PosIntVector fpcards = rcpp_fpInfos["fpcards"]; |
| 372 |
// names of fp terms |
|
| 373 | 164x |
const StrVector fpnames = rcpp_fpInfos["fpnames"]; |
| 374 | ||
| 375 | ||
| 376 |
// UC configuration: |
|
| 377 | ||
| 378 | 164x |
const PosIntVector ucIndices = rcpp_ucInfos["ucIndices"]; |
| 379 | 164x |
List rcpp_ucColList = rcpp_ucInfos["ucColList"]; |
| 380 | ||
| 381 | 82x |
std::vector<PosIntVector> ucColList; |
| 382 | 538x |
for (R_len_t i = 0; i != rcpp_ucColList.length(); ++i) |
| 383 |
{
|
|
| 384 | 456x |
ucColList.push_back(as<PosIntVector>(rcpp_ucColList[i])); |
| 385 |
} |
|
| 386 |
|
|
| 387 |
// fixed covariate configuration: |
|
| 388 |
|
|
| 389 | 164x |
const PosIntVector fixIndices = rcpp_fixInfos["fixIndices"]; |
| 390 | 164x |
List rcpp_fixColList = rcpp_fixInfos["fixColList"]; |
| 391 |
|
|
| 392 | 82x |
std::vector<PosIntVector> fixColList; |
| 393 |
for (R_len_t i = 0; i != rcpp_fixColList.length(); ++i) |
|
| 394 |
{
|
|
| 395 | ! |
fixColList.push_back(as<PosIntVector>(rcpp_fixColList[i])); |
| 396 |
} |
|
| 397 |
|
|
| 398 |
|
|
| 399 |
|
|
| 400 | ||
| 401 |
// distributions info: |
|
| 402 | ||
| 403 | 82x |
const double nullModelLogMargLik = as<double>(rcpp_distribution["nullModelLogMargLik"]); |
| 404 | 82x |
const double nullModelDeviance = as<double>(rcpp_distribution["nullModelDeviance"]); |
| 405 | 164x |
S4 rcpp_gPrior = rcpp_distribution["gPrior"]; |
| 406 | 164x |
List rcpp_family = rcpp_distribution["family"]; |
| 407 | 82x |
const bool tbf = as<bool>(rcpp_distribution["tbf"]); |
| 408 | 82x |
const bool doGlm = as<bool>(rcpp_distribution["doGlm"]); |
| 409 |
|
|
| 410 | 82x |
const double empiricalMean = as<double>(rcpp_distribution["yMean"]); |
| 411 | 82x |
const bool empiricalgPrior = as<bool>(rcpp_distribution["empiricalgPrior"]); |
| 412 |
|
|
| 413 |
// model search configuration: |
|
| 414 | 82x |
const bool useFixedc = as<bool>(rcpp_searchConfig["useFixedc"]); |
| 415 |
|
|
| 416 |
// options: |
|
| 417 | ||
| 418 | 82x |
const bool estimateMargLik = as<bool>(rcpp_options["estimateMargLik"]); |
| 419 | 82x |
const bool verbose = as<bool>(rcpp_options["verbose"]); |
| 420 | 82x |
const bool debug = as<bool>(rcpp_options["debug"]); |
| 421 | 82x |
const bool isNullModel = as<bool>(rcpp_options["isNullModel"]); |
| 422 | 82x |
const bool useFixedZ = as<bool>(rcpp_options["useFixedZ"]); |
| 423 | 82x |
const double fixedZ = as<double>(rcpp_options["fixedZ"]); |
| 424 |
#ifdef _OPENMP |
|
| 425 | 82x |
const bool useOpenMP = as<bool>(rcpp_options["useOpenMP"]); |
| 426 |
#endif |
|
| 427 | ||
| 428 | 164x |
S4 rcpp_mcmc = rcpp_options["mcmc"]; |
| 429 | 82x |
const PosInt iterations = rcpp_mcmc.slot("iterations");
|
| 430 | 82x |
const PosInt burnin = rcpp_mcmc.slot("burnin");
|
| 431 | 82x |
const PosInt step = rcpp_mcmc.slot("step");
|
| 432 | ||
| 433 | ||
| 434 |
// z density stuff: |
|
| 435 | ||
| 436 | 164x |
const RFunction logMarginalZdens(as<SEXP>(rcpp_marginalz["logDens"])); |
| 437 | 164x |
const RFunction marginalZgen(as<SEXP>(rcpp_marginalz["gen"])); |
| 438 | ||
| 439 | ||
| 440 |
// ---------------------------------------------------------------------------------- |
|
| 441 |
// further process arguments |
|
| 442 |
// ---------------------------------------------------------------------------------- |
|
| 443 | ||
| 444 |
// data: |
|
| 445 | ||
| 446 |
// only the intercept is always included, that is fixed, in the model |
|
| 447 | 82x |
IntSet fixedCols; |
| 448 | 82x |
fixedCols.insert(1); |
| 449 | ||
| 450 |
// totalnumber is set to 0 because we do not care about it. |
|
| 451 | 82x |
const DataValues data(x, xCentered, y, censInd, 0, fixedCols); |
| 452 | ||
| 453 |
// FP configuration: |
|
| 454 | 82x |
const FpInfo fpInfo(fpcards, fppos, fpmaxs, fpnames, x); |
| 455 | ||
| 456 |
// UC configuration: |
|
| 457 | ||
| 458 |
// determine sizes of the UC groups, and the total size == maximum size reached together by all |
|
| 459 |
// UC groups. |
|
| 460 | 82x |
PosIntVector ucSizes; |
| 461 | 82x |
PosInt maxUcDim = 0; |
| 462 | 538x |
for (std::vector<PosIntVector>::const_iterator cols = ucColList.begin(); cols != ucColList.end(); ++cols) |
| 463 |
{
|
|
| 464 | 456x |
PosInt thisSize = cols->size(); |
| 465 | ||
| 466 | 456x |
maxUcDim += thisSize; |
| 467 | 456x |
ucSizes.push_back(thisSize); |
| 468 |
} |
|
| 469 | 82x |
const UcInfo ucInfo(ucSizes, maxUcDim, ucIndices, ucColList); |
| 470 | ||
| 471 |
|
|
| 472 |
// fix configuration: |
|
| 473 |
|
|
| 474 |
// determine sizes of the fix groups, and the total size == maximum size reached together by all |
|
| 475 |
// UC groups. |
|
| 476 | 82x |
PosIntVector fixSizes; |
| 477 | 82x |
PosInt maxFixDim = 0; |
| 478 |
for (std::vector<PosIntVector>::const_iterator cols = fixColList.begin(); cols != fixColList.end(); ++cols) |
|
| 479 |
{
|
|
| 480 | ! |
PosInt thisSize = cols->size(); |
| 481 |
|
|
| 482 | ! |
maxFixDim += thisSize; |
| 483 | ! |
fixSizes.push_back(thisSize); |
| 484 |
} |
|
| 485 | 82x |
const FixInfo fixInfo(fixSizes, maxFixDim, fixIndices, fixColList); |
| 486 |
|
|
| 487 |
|
|
| 488 |
|
|
| 489 |
|
|
| 490 |
|
|
| 491 |
// model configuration: |
|
| 492 |
GlmModelConfig config(rcpp_family, nullModelLogMargLik, nullModelDeviance, exp(fixedZ), rcpp_gPrior, |
|
| 493 | 82x |
data.response, debug, useFixedc, empiricalMean, empiricalgPrior); |
| 494 | ||
| 495 | ||
| 496 |
// model config/info: |
|
| 497 | 164x |
const Model thisModel(ModelPar(rcpp_model["configuration"], |
| 498 |
fpInfo), |
|
| 499 | 246x |
GlmModelInfo(as<List>(rcpp_model["information"]))); |
| 500 | ||
| 501 | ||
| 502 |
// the options |
|
| 503 |
const Options options(estimateMargLik, |
|
| 504 |
verbose, |
|
| 505 |
debug, |
|
| 506 |
isNullModel, |
|
| 507 |
useFixedZ, |
|
| 508 |
tbf, |
|
| 509 |
doGlm, |
|
| 510 |
iterations, |
|
| 511 |
burnin, |
|
| 512 | 82x |
step); |
| 513 | ||
| 514 |
// marginal z stuff |
|
| 515 |
const MarginalZ marginalZ(logMarginalZdens, |
|
| 516 | 82x |
marginalZgen); |
| 517 | ||
| 518 | ||
| 519 |
// use only one thread if we do not want to use openMP. |
|
| 520 |
#ifdef _OPENMP |
|
| 521 | 82x |
if(! useOpenMP) |
| 522 |
{
|
|
| 523 | ! |
omp_set_num_threads(1); |
| 524 |
} else {
|
|
| 525 | 82x |
omp_set_num_threads(omp_get_num_procs()); |
| 526 |
} |
|
| 527 |
#endif |
|
| 528 | ||
| 529 | ||
| 530 |
// ---------------------------------------------------------------------------------- |
|
| 531 |
// prepare the sampling |
|
| 532 |
// ---------------------------------------------------------------------------------- |
|
| 533 | ||
| 534 | 82x |
Fitter fitter; |
| 535 |
int nCoefs; |
|
| 536 | ||
| 537 | 82x |
if(options.doGlm) |
| 538 |
{
|
|
| 539 |
// construct IWLS object, which can be used for all IWLS stuff, |
|
| 540 |
// and also contains the design matrix etc |
|
| 541 | 2x |
fitter.iwlsObject = new Iwls(thisModel.par, |
| 542 |
data, |
|
| 543 |
fpInfo, |
|
| 544 |
ucInfo, |
|
| 545 |
fixInfo, |
|
| 546 |
config, |
|
| 547 |
config.linPredStart, |
|
| 548 | 2x |
options.useFixedZ, |
| 549 |
EPS, |
|
| 550 | 2x |
options.debug, |
| 551 | 2x |
options.tbf); |
| 552 | ||
| 553 | 2x |
nCoefs = fitter.iwlsObject->nCoefs; |
| 554 | ||
| 555 |
// check that we have the same answer about the null model as R |
|
| 556 |
//assert(fitter.iwlsObject->isNullModel == options.isNullModel); |
|
| 557 | 2x |
if(fitter.iwlsObject->isNullModel != options.isNullModel){
|
| 558 | ! |
Rcpp::stop("sampleGlm.cpp:cpp_sampleGlm: isNullModel != options.isNullModel");
|
| 559 |
} |
|
| 560 |
} |
|
| 561 |
else |
|
| 562 |
{
|
|
| 563 | 80x |
AMatrix design = getDesignMatrix(thisModel.par, data, fpInfo, ucInfo, fixInfo, false); |
| 564 | 80x |
fitter.coxfitObject = new Coxfit(data.response, |
| 565 |
data.censInd, |
|
| 566 |
design, |
|
| 567 |
config.weights, |
|
| 568 |
config.offsets, |
|
| 569 | 80x |
1); |
| 570 | ||
| 571 |
// the number of coefficients (here it does not include the intercept!!) |
|
| 572 | 80x |
nCoefs = design.n_cols; |
| 573 | ||
| 574 |
// check that we do not have a null model here: |
|
| 575 |
// assert(nCoefs > 0); |
|
| 576 | 80x |
if(nCoefs <= 0){
|
| 577 | ! |
Rcpp::stop("sampleGlm.cpp:cpp_sampleGlm: nCoefs <= 0");
|
| 578 |
} |
|
| 579 |
} |
|
| 580 | ||
| 581 | ||
| 582 |
// allocate sample container |
|
| 583 | 82x |
Samples samples(nCoefs, options.nSamples); |
| 584 | ||
| 585 |
// count how many proposals we have accepted: |
|
| 586 | 82x |
PosInt nAccepted(0); |
| 587 | ||
| 588 |
// at what z do we start? |
|
| 589 | 82x |
double startZ = useFixedZ ? fixedZ : thisModel.info.zMode; |
| 590 | ||
| 591 |
// start container with current things |
|
| 592 | 82x |
Mcmc now(marginalZ, data.nObs, nCoefs); |
| 593 | ||
| 594 | 82x |
if(doGlm) |
| 595 |
{
|
|
| 596 |
// get the mode for beta given the mode of the approximated marginal posterior as z |
|
| 597 |
// if TBF approach is used, this will be the only time the IWLS is used, |
|
| 598 |
// because we only need the MLE and the Cholesky factor of its |
|
| 599 |
// precision matrix estimate, which do not depend on z. |
|
| 600 | 2x |
PosInt iwlsIterations = fitter.iwlsObject->startWithNewLinPred(40, |
| 601 |
// this is the corresponding g |
|
| 602 |
exp(startZ), |
|
| 603 |
// and the start value for the linear predictor is taken from the Glm model config |
|
| 604 |
config.linPredStart); |
|
| 605 | ||
| 606 |
// echo debug-level message? |
|
| 607 | 2x |
if(options.debug) |
| 608 |
{
|
|
| 609 | ! |
Rprintf("\ncpp_sampleGlm: Initial IWLS for high density point finished after %d iterations",
|
| 610 |
iwlsIterations); |
|
| 611 |
} |
|
| 612 | ||
| 613 |
// this is the current proposal info: |
|
| 614 | 2x |
now.proposalInfo = fitter.iwlsObject->getResults(); |
| 615 | ||
| 616 |
// and this is the current parameters sample: |
|
| 617 | 4x |
now.sample = Parameter(now.proposalInfo.coefs, |
| 618 | 2x |
startZ); |
| 619 | ||
| 620 | 2x |
if(options.tbf) |
| 621 |
{
|
|
| 622 |
// we will not compute this in the TBF case: |
|
| 623 | ! |
now.logUnPosterior = R_NaReal; |
| 624 | ||
| 625 |
// start to compute the variance of the intercept parameter: |
|
| 626 | ||
| 627 |
// here the inverse cholesky factor of the precision matrix will |
|
| 628 |
// be stored. First, it's the identity matrix. |
|
| 629 | ! |
AMatrix inverseQfactor = arma::eye(now.proposalInfo.qFactor.n_rows, |
| 630 | ! |
now.proposalInfo.qFactor.n_cols); |
| 631 | ||
| 632 |
// do the inversion |
|
| 633 | ! |
trs(false, |
| 634 |
false, |
|
| 635 |
now.proposalInfo.qFactor, |
|
| 636 |
inverseQfactor); |
|
| 637 | ||
| 638 |
// now we can compute the variance of the intercept estimate: |
|
| 639 | ! |
const AVector firstCol = inverseQfactor.col(0); |
| 640 | ! |
const double interceptVar = arma::dot(firstCol, firstCol); |
| 641 | ||
| 642 |
// ok, now alter the qFactor appropriately to reflect the |
|
| 643 |
// independence assumption between the intercept estimate |
|
| 644 |
// and the other coefficients estimates |
|
| 645 | ! |
now.proposalInfo.qFactor.col(0) = arma::zeros<AVector>(now.proposalInfo.qFactor.n_rows); |
| 646 | ! |
now.proposalInfo.qFactor(0, 0) = sqrt(1.0 / interceptVar); |
| 647 |
} |
|
| 648 |
else |
|
| 649 |
{
|
|
| 650 |
// compute the (unnormalized) log posterior of the proposal |
|
| 651 | 2x |
now.logUnPosterior = fitter.iwlsObject->computeLogUnPosteriorDens(now.sample); |
| 652 |
} |
|
| 653 |
} |
|
| 654 |
else |
|
| 655 |
{
|
|
| 656 | 80x |
PosInt coxfitIterations = fitter.coxfitObject->fit(); |
| 657 | 80x |
CoxfitResults coxResults = fitter.coxfitObject->finalizeAndGetResults(); |
| 658 | 80x |
fitter.coxfitObject->checkResults(); |
| 659 | ||
| 660 |
// echo debug-level message? |
|
| 661 | 80x |
if(options.debug) |
| 662 |
{
|
|
| 663 | ! |
Rprintf("\ncpp_sampleGlm: Cox fit finished after %d iterations",
|
| 664 |
coxfitIterations); |
|
| 665 |
} |
|
| 666 | ||
| 667 |
// we will not compute this in the TBF case: |
|
| 668 | 80x |
now.logUnPosterior = R_NaReal; |
| 669 | ||
| 670 |
// compute the Cholesky factorization of the covariance matrix |
|
| 671 | 80x |
int info = potrf(false, |
| 672 |
coxResults.imat); |
|
| 673 | ||
| 674 |
// check that all went well |
|
| 675 | 80x |
if(info != 0) |
| 676 |
{
|
|
| 677 | ! |
std::ostringstream stream; |
| 678 | ! |
stream << "dpotrf(coxResults.imat) got error code " << info << "in sampleGlm"; |
| 679 | ! |
throw std::domain_error(stream.str().c_str()); |
| 680 |
} |
|
| 681 | ||
| 682 |
// compute the precision matrix, using the Cholesky factorization |
|
| 683 |
// of the covariance matrix |
|
| 684 | ! |
now.proposalInfo.qFactor = arma::eye(now.proposalInfo.qFactor.n_rows, |
| 685 | 160x |
now.proposalInfo.qFactor.n_cols); |
| 686 | 80x |
info = potrs(false, |
| 687 |
coxResults.imat, |
|
| 688 |
now.proposalInfo.qFactor); |
|
| 689 | ||
| 690 |
// check that all went well |
|
| 691 | 80x |
if(info != 0) |
| 692 |
{
|
|
| 693 | ! |
std::ostringstream stream; |
| 694 | ! |
stream << "dpotrs(coxResults.imat, now.proposalInfo.qFactor) got error code " << info << "in sampleGlm"; |
| 695 | ! |
throw std::domain_error(stream.str().c_str()); |
| 696 |
} |
|
| 697 | ||
| 698 |
// compute the Cholesky factorization of the precision matrix |
|
| 699 | 80x |
info = potrf(false, |
| 700 |
now.proposalInfo.qFactor); |
|
| 701 | ||
| 702 |
// check that all went well |
|
| 703 | 80x |
if(info != 0) |
| 704 |
{
|
|
| 705 | ! |
std::ostringstream stream; |
| 706 | ! |
stream << "dpotrf(now.proposalInfo.qFactor) got error code " << info << "in sampleGlm"; |
| 707 | ! |
throw std::domain_error(stream.str().c_str()); |
| 708 |
} |
|
| 709 | ||
| 710 |
// the MLE of the coefficients |
|
| 711 | 80x |
now.proposalInfo.coefs = coxResults.coefs; |
| 712 |
} |
|
| 713 | ||
| 714 |
// so the parameter object "now" is then also the high density point |
|
| 715 |
// required for the marginal likelihood estimate: |
|
| 716 | 82x |
const Mcmc highDensityPoint(now); |
| 717 | ||
| 718 |
// we accept this starting value, so initialize "old" with the same ones |
|
| 719 | 82x |
Mcmc old(now); |
| 720 | ||
| 721 |
// ---------------------------------------------------------------------------------- |
|
| 722 |
// start sampling |
|
| 723 |
// ---------------------------------------------------------------------------------- |
|
| 724 | ||
| 725 |
// echo debug-level message? |
|
| 726 | 82x |
if(options.debug) |
| 727 |
{
|
|
| 728 | ! |
if(tbf) |
| 729 |
{
|
|
| 730 | ! |
Rprintf("\ncpp_sampleGlm: Starting MC simulation");
|
| 731 |
} |
|
| 732 |
else |
|
| 733 |
{
|
|
| 734 | ! |
Rprintf("\ncpp_sampleGlm: Starting MCMC loop");
|
| 735 |
} |
|
| 736 |
} |
|
| 737 | ||
| 738 | ||
| 739 |
// i_iter starts at 1 !! |
|
| 740 | 162682x |
for(PosInt i_iter = 1; i_iter <= options.iterations; ++i_iter) |
| 741 |
{
|
|
| 742 |
// echo debug-level message? |
|
| 743 | 162600x |
if(options.debug) |
| 744 |
{
|
|
| 745 | ! |
Rprintf("\ncpp_sampleGlm: Starting iteration no. %d", i_iter);
|
| 746 |
} |
|
| 747 | ||
| 748 |
// ---------------------------------------------------------------------------------- |
|
| 749 |
// store the proposal |
|
| 750 |
// ---------------------------------------------------------------------------------- |
|
| 751 | ||
| 752 |
// sample one new log covariance factor z (other arguments than 1 are not useful |
|
| 753 |
// with the current setup of the RFunction wrapper class) |
|
| 754 | 162600x |
now.sample.z = marginalZ.gen(1); |
| 755 | ||
| 756 | 162600x |
if(options.tbf) |
| 757 |
{
|
|
| 758 | 160600x |
if(options.isNullModel) |
| 759 |
{
|
|
| 760 |
// note that we do not encounter this in the Cox case |
|
| 761 |
// assert(options.doGlm); |
|
| 762 | ! |
if(!options.doGlm){
|
| 763 | ! |
Rcpp::stop("sampleGlm.cpp:cpp_sampleGlm: options.doGlm should be TRUE");
|
| 764 |
} |
|
| 765 | ||
| 766 |
// draw the proposal coefs, which is here just the intercept |
|
| 767 | ! |
now.sample.coefs = drawNormalVector(now.proposalInfo.coefs, |
| 768 | ! |
now.proposalInfo.qFactor); |
| 769 | ||
| 770 |
} |
|
| 771 |
else |
|
| 772 |
{ // here we have at least one non-intercept coefficient
|
|
| 773 | ||
| 774 |
// get vector from N(0, I) |
|
| 775 | 160600x |
AVector w = drawNormalVariates(now.proposalInfo.coefs.n_elem, |
| 776 |
0.0, |
|
| 777 | 160600x |
1.0); |
| 778 | ||
| 779 |
// then solve L' * ret = w, and overwrite w with the result: |
|
| 780 | 160600x |
trs(false, |
| 781 |
true, |
|
| 782 |
now.proposalInfo.qFactor, |
|
| 783 |
w); |
|
| 784 | ||
| 785 |
// compute the shrinkage factor t = g / (g + 1) |
|
| 786 | 160600x |
const double g = exp(now.sample.z); |
| 787 |
|
|
| 788 |
//Previously used g directly, but if g=inf we need to use the limit |
|
| 789 |
// const double shrinkFactor = g / (g + 1.0); |
|
| 790 |
const double shrinkFactor = std::isinf(g) ? 1 : g / (g + 1.0); |
|
| 791 |
|
|
| 792 |
// scale the variance of the non-intercept coefficients |
|
| 793 |
// with this factor. |
|
| 794 |
// In the Cox case: no intercept present, so scale everything |
|
| 795 |
int startCoef = options.doGlm ? 1 : 0; |
|
| 796 | ||
| 797 | 321200x |
w.rows(startCoef, w.n_rows - 1) *= sqrt(shrinkFactor); |
| 798 | ||
| 799 |
// also scale the mean of the non-intercept coefficients |
|
| 800 |
// appropriately: |
|
| 801 |
// In the Cox case: no intercept present, so scale everything |
|
| 802 | 160600x |
now.sample.coefs = now.proposalInfo.coefs; |
| 803 | 321200x |
now.sample.coefs.rows(startCoef, now.sample.coefs.n_rows - 1) *= shrinkFactor; |
| 804 | ||
| 805 |
// so altogether we have: |
|
| 806 | 160600x |
now.sample.coefs += w; |
| 807 |
} |
|
| 808 | 160600x |
++nAccepted; |
| 809 |
} |
|
| 810 |
else // the generalized hyper-g prior case |
|
| 811 |
{
|
|
| 812 |
// do 1 IWLS step, starting from the last linear predictor and the new z |
|
| 813 |
// (here the return value is not very interesting, as it must be 1) |
|
| 814 | 2000x |
fitter.iwlsObject->startWithNewCoefs(1, |
| 815 |
exp(now.sample.z), |
|
| 816 |
now.sample.coefs); |
|
| 817 | ||
| 818 |
// get the results |
|
| 819 | 2000x |
now.proposalInfo = fitter.iwlsObject->getResults(); |
| 820 | ||
| 821 |
// draw the proposal coefs: |
|
| 822 | 4000x |
now.sample.coefs = drawNormalVector(now.proposalInfo.coefs, |
| 823 | 2000x |
now.proposalInfo.qFactor); |
| 824 | ||
| 825 |
// compute the (unnormalized) log posterior of the proposal |
|
| 826 | 2000x |
now.logUnPosterior = fitter.iwlsObject->computeLogUnPosteriorDens(now.sample); |
| 827 | ||
| 828 |
// ---------------------------------------------------------------------------------- |
|
| 829 |
// get the reverse jump normal density |
|
| 830 |
// ---------------------------------------------------------------------------------- |
|
| 831 | ||
| 832 |
// copy the old Mcmc object |
|
| 833 | 2000x |
Mcmc reverse(old); |
| 834 | ||
| 835 |
// do again 1 IWLS step, starting from the sampled linear predictor and the old z |
|
| 836 | 2000x |
fitter.iwlsObject->startWithNewCoefs(1, |
| 837 |
exp(reverse.sample.z), |
|
| 838 |
now.sample.coefs); |
|
| 839 | ||
| 840 |
// get the results for the reverse jump Gaussian: |
|
| 841 |
// only the proposal has changed in contrast to the old container, |
|
| 842 |
// the sample stays the same! |
|
| 843 | 2000x |
reverse.proposalInfo = fitter.iwlsObject->getResults(); |
| 844 | ||
| 845 | ||
| 846 |
// ---------------------------------------------------------------------------------- |
|
| 847 |
// compute the proposal density ratio |
|
| 848 |
// ---------------------------------------------------------------------------------- |
|
| 849 | ||
| 850 |
// first the log of the numerator, i.e. log(f(old | new)): |
|
| 851 | 2000x |
double logProposalRatioNumerator = reverse.computeLogProposalDens(); |
| 852 | ||
| 853 |
// second the log of the denominator, i.e. log(f(new | old)): |
|
| 854 | 2000x |
double logProposalRatioDenominator = now.computeLogProposalDens(); |
| 855 | ||
| 856 |
// so the log proposal density ratio is |
|
| 857 | 2000x |
double logProposalRatio = logProposalRatioNumerator - logProposalRatioDenominator; |
| 858 | ||
| 859 |
// ---------------------------------------------------------------------------------- |
|
| 860 |
// compute the posterior density ratio |
|
| 861 |
// ---------------------------------------------------------------------------------- |
|
| 862 | ||
| 863 | 2000x |
double logPosteriorRatio = now.logUnPosterior - old.logUnPosterior; |
| 864 | ||
| 865 |
// ---------------------------------------------------------------------------------- |
|
| 866 |
// accept or reject proposal |
|
| 867 |
// ---------------------------------------------------------------------------------- |
|
| 868 | ||
| 869 | 2000x |
double acceptanceProb = exp(logPosteriorRatio + logProposalRatio); |
| 870 | ||
| 871 | 2000x |
if(unif() < acceptanceProb) |
| 872 |
{
|
|
| 873 | 1650x |
old = now; |
| 874 | ||
| 875 | 1650x |
++nAccepted; |
| 876 |
} |
|
| 877 |
else |
|
| 878 |
{
|
|
| 879 | 350x |
now = old; |
| 880 |
} |
|
| 881 |
} |
|
| 882 | ||
| 883 |
// ---------------------------------------------------------------------------------- |
|
| 884 |
// store the sample? |
|
| 885 |
// ---------------------------------------------------------------------------------- |
|
| 886 | ||
| 887 |
// if the burnin was passed and we are at a multiple of step beyond that, then store |
|
| 888 |
// the sample. |
|
| 889 | 162600x |
if((i_iter > options.burnin) && |
| 890 | 161600x |
(((i_iter - options.burnin) % options.step) == 0)) |
| 891 |
{
|
|
| 892 |
// echo debug-level message |
|
| 893 | 160700x |
if(options.debug) |
| 894 |
{
|
|
| 895 | ! |
Rprintf("\ncpp_sampleGlm: Storing samples of iteration no. %d", i_iter);
|
| 896 |
} |
|
| 897 | ||
| 898 |
// store the current parameter sample |
|
| 899 | 160700x |
samples.storeParameters(now.sample); |
| 900 | ||
| 901 |
// ---------------------------------------------------------------------------------- |
|
| 902 |
// compute marginal likelihood terms |
|
| 903 |
// ---------------------------------------------------------------------------------- |
|
| 904 | ||
| 905 |
// compute marginal likelihood terms and save them? |
|
| 906 |
// (Note that the tbf bool is just for safety here, |
|
| 907 |
// the R function sampleGlm will set estimateMargLik to FALSE |
|
| 908 |
// when tbf is TRUE.) |
|
| 909 | 160700x |
if(options.estimateMargLik && (! options.tbf)) |
| 910 |
{
|
|
| 911 |
// echo debug-level message? |
|
| 912 | 100x |
if(options.debug) |
| 913 |
{
|
|
| 914 | ! |
Rprintf("\ncpp_sampleGlm: Compute marginal likelihood estimation terms");
|
| 915 |
} |
|
| 916 | ||
| 917 |
// ---------------------------------------------------------------------------------- |
|
| 918 |
// compute next term for the denominator |
|
| 919 |
// ---------------------------------------------------------------------------------- |
|
| 920 | ||
| 921 |
// draw from the high density point proposal distribution |
|
| 922 | 100x |
Mcmc denominator(highDensityPoint); |
| 923 | 100x |
denominator.sample.z = marginalZ.gen(1); |
| 924 | ||
| 925 | 100x |
fitter.iwlsObject->startWithNewLinPred(1, |
| 926 |
exp(denominator.sample.z), |
|
| 927 |
highDensityPoint.proposalInfo.linPred); |
|
| 928 | ||
| 929 | 100x |
denominator.proposalInfo = fitter.iwlsObject->getResults(); |
| 930 | ||
| 931 | 200x |
denominator.sample.coefs = drawNormalVector(denominator.proposalInfo.coefs, |
| 932 | 100x |
denominator.proposalInfo.qFactor); |
| 933 | ||
| 934 |
// get posterior density of the sample |
|
| 935 | 100x |
denominator.logUnPosterior = fitter.iwlsObject->computeLogUnPosteriorDens(denominator.sample); |
| 936 | ||
| 937 |
// get the proposal density at the sample |
|
| 938 | 100x |
double denominator_logProposalDensity = denominator.computeLogProposalDens(); |
| 939 | ||
| 940 |
// then the reverse stuff: |
|
| 941 |
// first we copy again the high density point |
|
| 942 | 100x |
Mcmc revDenom(highDensityPoint); |
| 943 | ||
| 944 |
// but choose the new sampled coefficients as starting point |
|
| 945 | 100x |
fitter.iwlsObject->startWithNewCoefs(1, |
| 946 |
exp(revDenom.sample.z), |
|
| 947 |
denominator.sample.coefs); |
|
| 948 | 100x |
revDenom.proposalInfo = fitter.iwlsObject->getResults(); |
| 949 | ||
| 950 |
// so the reverse proposal density is |
|
| 951 | 100x |
double revDenom_logProposalDensity = revDenom.computeLogProposalDens(); |
| 952 | ||
| 953 | ||
| 954 |
// so altogether the next term for the denominator is the following acceptance probability |
|
| 955 | 100x |
double denominatorTerm = denominator.logUnPosterior - highDensityPoint.logUnPosterior + |
| 956 |
revDenom_logProposalDensity - denominator_logProposalDensity; |
|
| 957 | 100x |
denominatorTerm = exp(fmin(0.0, denominatorTerm)); |
| 958 | ||
| 959 |
// ---------------------------------------------------------------------------------- |
|
| 960 |
// compute next term for the numerator |
|
| 961 |
// ---------------------------------------------------------------------------------- |
|
| 962 | ||
| 963 |
// compute the proposal density of the current sample starting from the high density point |
|
| 964 | 100x |
Mcmc numerator(now); |
| 965 | ||
| 966 | 100x |
fitter.iwlsObject->startWithNewLinPred(1, |
| 967 |
exp(numerator.sample.z), |
|
| 968 |
highDensityPoint.proposalInfo.linPred); |
|
| 969 | 100x |
numerator.proposalInfo = fitter.iwlsObject->getResults(); |
| 970 | ||
| 971 | 100x |
double numerator_logProposalDensity = numerator.computeLogProposalDens(); |
| 972 | ||
| 973 |
// then compute the reverse proposal density of the high density point when we start from the current |
|
| 974 |
// sample |
|
| 975 | 100x |
Mcmc revNum(highDensityPoint); |
| 976 | ||
| 977 | 100x |
fitter.iwlsObject->startWithNewCoefs(1, |
| 978 |
exp(revNum.sample.z), |
|
| 979 |
now.sample.coefs); |
|
| 980 | 100x |
revNum.proposalInfo = fitter.iwlsObject->getResults(); |
| 981 | ||
| 982 | 100x |
double revNum_logProposalDensity = revNum.computeLogProposalDens(); |
| 983 | ||
| 984 |
// so altogether the next term for the numerator is the following guy: |
|
| 985 | 100x |
double numeratorTerm = exp(fmin(revNum_logProposalDensity, |
| 986 | 100x |
highDensityPoint.logUnPosterior - now.logUnPosterior + |
| 987 |
numerator_logProposalDensity)); |
|
| 988 | ||
| 989 |
// ---------------------------------------------------------------------------------- |
|
| 990 |
// finally store both terms |
|
| 991 |
// ---------------------------------------------------------------------------------- |
|
| 992 | ||
| 993 | 100x |
samples.storeMargLikTerms(numeratorTerm, denominatorTerm); |
| 994 | ||
| 995 |
} |
|
| 996 |
} |
|
| 997 | ||
| 998 |
// ---------------------------------------------------------------------------------- |
|
| 999 |
// echo progress? |
|
| 1000 |
// ---------------------------------------------------------------------------------- |
|
| 1001 | ||
| 1002 |
// echo debug-level message? |
|
| 1003 | 162600x |
if(options.debug) |
| 1004 |
{
|
|
| 1005 | ! |
Rprintf("\ncpp_sampleGlm: Finished iteration no. %d", i_iter);
|
| 1006 |
} |
|
| 1007 | ||
| 1008 | 169164x |
if((i_iter % std::max(static_cast<int>(options.iterations / 100), 1) == 0) && |
| 1009 | 6564x |
options.verbose) |
| 1010 |
{
|
|
| 1011 |
// display computation progress at each percent |
|
| 1012 | 4764x |
Rprintf("-");
|
| 1013 | ||
| 1014 |
} // end echo progress |
|
| 1015 | ||
| 1016 |
} // end MCMC loop |
|
| 1017 | ||
| 1018 | ||
| 1019 |
// echo debug-level message? |
|
| 1020 | 82x |
if(options.debug) |
| 1021 |
{
|
|
| 1022 | ! |
if(tbf) |
| 1023 |
{
|
|
| 1024 | ! |
Rprintf("\ncpp_sampleGlm: Finished MC simulation");
|
| 1025 |
} |
|
| 1026 |
else |
|
| 1027 |
{
|
|
| 1028 | ! |
Rprintf("\ncpp_sampleGlm: Finished MCMC loop");
|
| 1029 |
} |
|
| 1030 |
} |
|
| 1031 | ||
| 1032 | ||
| 1033 |
// ---------------------------------------------------------------------------------- |
|
| 1034 |
// build up return list for R and return that. |
|
| 1035 |
// ---------------------------------------------------------------------------------- |
|
| 1036 | ||
| 1037 | 164x |
return List::create(_["samples"] = samples.convert2list(), |
| 1038 | 164x |
_["nAccepted"] = nAccepted, |
| 1039 | 328x |
_["highDensityPointLogUnPosterior"] = highDensityPoint.logUnPosterior); |
| 1040 | ||
| 1041 | 82x |
} // end cpp_sampleGlm |
| 1042 | ||
| 1043 |
// ***************************************************************************************************// |
|
| 1044 | ||
| 1045 |
// End of sampleGlm.cpp |
|
| 1046 |
| 1 |
#include "zdensity.h" |
|
| 2 |
#include <stdexcept> |
|
| 3 | ||
| 4 |
#include "rcppExport.h" |
|
| 5 |
#include "linalgInterface.h" |
|
| 6 |
#include "coxfit.h" |
|
| 7 | ||
| 8 |
// 03/07/2013: use offsets |
|
| 9 | ||
| 10 |
// constructor |
|
| 11 | 1568x |
NegLogUnnormZDens::NegLogUnnormZDens(const ModelPar &mod, |
| 12 |
const DataValues& data, |
|
| 13 |
const FpInfo& fpInfo, |
|
| 14 |
const UcInfo& ucInfo, |
|
| 15 |
const FixInfo& fixInfo, |
|
| 16 |
const GlmModelConfig& config, |
|
| 17 |
// return the approximate *conditional* density f(y | z, mod) by operator()? |
|
| 18 |
// otherwise return the approximate unnormalized *joint* density f(y, z | mod). |
|
| 19 |
const Book& bookkeep, |
|
| 20 | 1568x |
PosInt nIter) : |
| 21 | 1568x |
mod(mod), |
| 22 | 1568x |
fpInfo(fpInfo), |
| 23 | 1568x |
config(config), |
| 24 | 1568x |
bookkeep(bookkeep), |
| 25 | 1568x |
linPredStart(config.linPredStart), |
| 26 | 1568x |
iwlsObject(0), |
| 27 | 1568x |
coxfitObject(0), |
| 28 | 1568x |
nIter(nIter), |
| 29 | 1568x |
modSize(mod.size(ucInfo, fixInfo)), |
| 30 | 1568x |
modResidualDeviance(R_NaReal) |
| 31 |
{
|
|
| 32 | 1568x |
if(bookkeep.doGlm) |
| 33 |
{
|
|
| 34 | 129x |
iwlsObject = new Iwls(mod, data, fpInfo, ucInfo, fixInfo, config, |
| 35 | 129x |
config.linPredStart, |
| 36 |
// take the same original start value for each model, but then update |
|
| 37 |
// it inside the iwls object when new calls to the functor are made. |
|
| 38 |
(bookkeep.useFixedg || bookkeep.empiricalBayes), |
|
| 39 |
EPS, // take EPS as the convergence epsilon |
|
| 40 | 129x |
bookkeep.debug, |
| 41 | 258x |
bookkeep.tbf); |
| 42 | ||
| 43 | 129x |
if(bookkeep.tbf) |
| 44 |
{
|
|
| 45 |
// compute the residual deviance for this model |
|
| 46 | ! |
modResidualDeviance = config.nullModelDeviance - iwlsObject->computeDeviance(100); |
| 47 | ||
| 48 |
// echo detailed progress in debug mode |
|
| 49 | ! |
if(bookkeep.debug) |
| 50 |
{
|
|
| 51 | ! |
Rprintf("\nNegLogUnnormZDens: Residual deviance of this model is %f", modResidualDeviance);
|
| 52 |
} |
|
| 53 |
} |
|
| 54 | ||
| 55 |
} |
|
| 56 |
else |
|
| 57 |
{
|
|
| 58 | 2878x |
coxfitObject = new Coxfit(data.response, |
| 59 | 1439x |
data.censInd, |
| 60 | 2878x |
getDesignMatrix(mod, data, fpInfo, ucInfo, fixInfo, false), |
| 61 | 1439x |
config.weights, |
| 62 | 1439x |
config.offsets, |
| 63 | 1439x |
1); |
| 64 | ||
| 65 |
// compute the residual deviance for this model |
|
| 66 | 1439x |
modResidualDeviance = coxfitObject->computeResidualDeviance(); |
| 67 | ||
| 68 |
// echo detailed progress in debug mode |
|
| 69 | 1439x |
if(bookkeep.debug) |
| 70 |
{
|
|
| 71 | ! |
Rprintf("\nNegLogUnnormZDens: Residual deviance of this Cox model is %f", modResidualDeviance);
|
| 72 |
} |
|
| 73 |
} |
|
| 74 |
} |
|
| 75 | ||
| 76 |
// try to get the TBF log marginal likelihood |
|
| 77 |
double |
|
| 78 | 1237x |
NegLogUnnormZDens::getTBFLogMargLik() const |
| 79 |
{
|
|
| 80 | 1237x |
double ret = R_NaReal; |
| 81 | ||
| 82 | 1237x |
if(bookkeep.tbf) |
| 83 |
{
|
|
| 84 |
// ask g-prior object to give log marginal likelihood |
|
| 85 |
// if a closed form is not available (as for most classes) then |
|
| 86 |
// NA is returned. |
|
| 87 | 1237x |
ret = config.gPrior->getTBFLogMargLik(modResidualDeviance, modSize); |
| 88 |
} else {
|
|
| 89 | ! |
std::ostringstream stream; |
| 90 | ! |
stream << "getTBFLogMargLik asked from NegLogUnnormZDens, but TBF methodology is not used!"; |
| 91 | ! |
throw std::domain_error(stream.str().c_str()); |
| 92 |
} |
|
| 93 | ||
| 94 | 1237x |
return ret; |
| 95 |
} |
|
| 96 | ||
| 97 |
// get the maximum log conditional marginal likelihood |
|
| 98 |
// and put the local EB estimate into zMode |
|
| 99 |
double |
|
| 100 | 63x |
NegLogUnnormZDens::getTBFMaxLogCondMargLik(double& zMode) const |
| 101 |
{
|
|
| 102 | 63x |
double ret = R_NaReal; |
| 103 | ||
| 104 | 63x |
if(bookkeep.tbf) |
| 105 |
{
|
|
| 106 | 63x |
if(modResidualDeviance > modSize) |
| 107 |
{
|
|
| 108 | 50x |
double gMode = modResidualDeviance / modSize - 1.0; |
| 109 | 50x |
zMode = log(gMode); |
| 110 | ||
| 111 | 50x |
ret = - modSize / 2.0 * log1p(gMode) + gMode / (gMode + 1.0) * modResidualDeviance / 2.0; |
| 112 |
} |
|
| 113 |
else |
|
| 114 |
{
|
|
| 115 | 13x |
zMode = R_NegInf; |
| 116 | 13x |
ret = 0.0; |
| 117 |
} |
|
| 118 |
} else {
|
|
| 119 | ! |
std::ostringstream stream; |
| 120 | ! |
stream << "getTBFMaxLogCondMargLik asked from NegLogUnnormZDens, but TBF methodology is not used!"; |
| 121 | ! |
throw std::domain_error(stream.str().c_str()); |
| 122 |
} |
|
| 123 | ||
| 124 | 63x |
return ret; |
| 125 |
} |
|
| 126 | ||
| 127 |
// call the function object |
|
| 128 |
double |
|
| 129 | 173591x |
NegLogUnnormZDens::operator()(double z) |
| 130 |
{
|
|
| 131 |
// map back to the original covariance factor scale |
|
| 132 | 173591x |
const double g = exp(z); |
| 133 | ||
| 134 |
// optional status message |
|
| 135 | 173591x |
if(bookkeep.debug) |
| 136 |
{
|
|
| 137 | ! |
Rprintf("\nNegLogUnnormZDens: Computing function call NegLogUnnormZDens(%f) ...", z);
|
| 138 |
} |
|
| 139 | ||
| 140 |
// if g is numerically zero, we cannot proceed |
|
| 141 | 173591x |
if(g == 0.0) |
| 142 |
{
|
|
| 143 | ! |
std::ostringstream stream; |
| 144 | ! |
stream << "g numerically zero"; |
| 145 | ! |
throw std::domain_error(stream.str().c_str()); |
| 146 |
} |
|
| 147 | ||
| 148 |
// the return value will be placed in here: |
|
| 149 | 173591x |
double ret = 0.0; |
| 150 | ||
| 151 |
// first consider the TBF case |
|
| 152 | 173591x |
if(bookkeep.tbf) |
| 153 |
{
|
|
| 154 | 160641x |
if(bookkeep.debug) |
| 155 |
{
|
|
| 156 | ! |
Rprintf("\nNegLogUnnormZDens: Using TBF approach");
|
| 157 |
} |
|
| 158 | ||
| 159 | 160641x |
double logBF = - modSize / 2.0 * log1p(g) + g / (g + 1.0) * modResidualDeviance / 2.0; |
| 160 | ||
| 161 |
// Note that the following distinction is done *inside* |
|
| 162 |
// Iwls::computeLogUnPosteriorDens for the generalized hyper-g prior approach. |
|
| 163 | 160641x |
if(bookkeep.useFixedg || bookkeep.empiricalBayes) |
| 164 |
{
|
|
| 165 |
// return the negative conditional log marginal likelihood approximation, - log(f(y | z)) |
|
| 166 | 139x |
ret = - logBF; |
| 167 |
} |
|
| 168 |
else |
|
| 169 |
{
|
|
| 170 |
// return - log(f(y | z) f(z)) = - log(f(y, z)), the negative unnormalized log posterior of z |
|
| 171 | 160502x |
double logGprior = config.gPrior->logDens(g) + z; |
| 172 | 160502x |
ret = - logBF - logGprior; |
| 173 |
} |
|
| 174 |
} |
|
| 175 |
else // hyper-g prior |
|
| 176 |
{
|
|
| 177 | 12950x |
if(bookkeep.debug) |
| 178 |
{
|
|
| 179 | ! |
Rprintf("\nNegLogUnnormZDens: Using generalised hyper-g prior approach");
|
| 180 |
} |
|
| 181 | ||
| 182 |
// protect against errors in IWLS and non-finite result |
|
| 183 |
try |
|
| 184 |
{
|
|
| 185 |
// compute Gaussian approximation: |
|
| 186 | 12950x |
PosInt requiredIter = iwlsObject->startWithLastLinPred(nIter, // iterate at most nIter times (because we want convergence) |
| 187 |
g); // for this covariance factor, for the current linear predictor start. |
|
| 188 | ||
| 189 |
// if we did not converge in the maximum number of iterations, then start again from the original linear predictor, |
|
| 190 |
// and allow even more iterations. |
|
| 191 | 12950x |
if(requiredIter > nIter) |
| 192 |
{
|
|
| 193 | ! |
PosInt higherIter = 2 * nIter; |
| 194 | ! |
requiredIter = iwlsObject->startWithNewLinPred(higherIter, |
| 195 |
g, |
|
| 196 | ! |
linPredStart); |
| 197 | ||
| 198 |
// if we still do not have converged, print a warning message if we are verbose. |
|
| 199 | ! |
if(requiredIter > higherIter && bookkeep.debug) |
| 200 |
{
|
|
| 201 | ! |
Rprintf("\nnNegLogUnnormZDens: IWLS did not converge in %d iterations (even after restart)!", higherIter);
|
| 202 |
} |
|
| 203 | ||
| 204 |
// do not warn the user, because this need not be a serious problem. |
|
| 205 |
} |
|
| 206 | ||
| 207 | 12950x |
if(bookkeep.debug) |
| 208 |
{
|
|
| 209 | ! |
Rprintf("\nNegLogUnnormZDens: finished after %d IWLS iterations", requiredIter);
|
| 210 |
} |
|
| 211 | ||
| 212 |
// get iwls results |
|
| 213 | 12950x |
const IwlsResults iwlsResults = iwlsObject->getResults(); |
| 214 | ||
| 215 |
// then the return value is: |
|
| 216 | 25900x |
ret = 0.5 * iwlsResults.logPrecisionDeterminant - |
| 217 | 12950x |
iwlsObject->nCoefs * M_LN_SQRT_2PI - // This is dependent on the model dimension! (we had to add it here because |
| 218 |
// computeLogUnPosteriorDens also contains now the analogous normalization constant. |
|
| 219 | 12950x |
iwlsObject->computeLogUnPosteriorDens(Parameter(iwlsResults.coefs, z)); |
| 220 | ||
| 221 | ||
| 222 |
// the Raudenbush & Yang & Yosef correction to the Laplace approximation |
|
| 223 |
// in the canonical link case |
|
| 224 | ||
| 225 |
// Warning: this may not work yet for some reason for some data sets, |
|
| 226 |
// strange negative correctionFactor values can occur! |
|
| 227 |
// (both for the removed Fortran and for the actual C++ code...) |
|
| 228 | 12950x |
if(bookkeep.higherOrderCorrection && config.canonicalLink) |
| 229 |
{
|
|
| 230 | ||
| 231 |
// initialize E(T_4), E(T_6): |
|
| 232 |
// (excluding constant factors which are multiplied at the end) |
|
| 233 | 12950x |
double exp_T4 = 0.0; |
| 234 | 12950x |
double exp_T6 = 0.0; |
| 235 | ||
| 236 |
// for the sum over m_i^(3) * x_i * B_i: |
|
| 237 | 25900x |
AVector k = arma::zeros(iwlsObject->nCoefs); |
| 238 | ||
| 239 |
// design matrix is also needed |
|
| 240 | 12950x |
const AMatrix& design = iwlsObject->design; |
| 241 | ||
| 242 |
// iterate through the observations: |
|
| 243 | 6902350x |
for(PosInt i = 0; i < iwlsObject->nObs; ++i) |
| 244 |
{
|
|
| 245 |
// calculate the fitted probability and resulting derivatives m_i^(x) |
|
| 246 |
// along the way. |
|
| 247 | 13778800x |
const double mu = config.link->linkinv(iwlsResults.linPred(i)); |
| 248 |
double m3, m4, m6; |
|
| 249 | ||
| 250 |
// see page 147 in Raudenbush et al. for the formulas of m3, m4, m6 |
|
| 251 | 6889400x |
if(config.familyString == "binomial") |
| 252 |
{
|
|
| 253 | 6889400x |
const double m2 = mu * (1.0 - mu); |
| 254 | 6889400x |
m3 = m2 * (1.0 - 2.0 * mu); |
| 255 | 6889400x |
m4 = m2 * (1.0 - 6.0 * m2); |
| 256 | 6889400x |
m6 = m4 * (1.0 - 12.0 * m2) - 12.0 * m3 * m3; |
| 257 |
} |
|
| 258 | ! |
else if (config.familyString == "poisson") |
| 259 |
{
|
|
| 260 | ! |
m3 = mu; |
| 261 | ! |
m4 = mu; |
| 262 | ! |
m6 = mu; |
| 263 |
} |
|
| 264 |
else |
|
| 265 |
{
|
|
| 266 | ! |
Rf_error("Higher order correction not implemented for this family.");
|
| 267 |
} |
|
| 268 | ||
| 269 |
// add to the sums: |
|
| 270 | 13778800x |
AVector tmp = arma::trans(design.row(i)); |
| 271 | 6889400x |
trs(false, false, iwlsResults.qFactor, tmp); |
| 272 | 6889400x |
const double B = arma::dot(tmp, tmp); |
| 273 | 6889400x |
const double B2 = B * B; |
| 274 | ||
| 275 | 6889400x |
exp_T4 += m4 * B2; |
| 276 | 6889400x |
exp_T6 += m6 * B2 * B; |
| 277 | 13778800x |
k += (m3 * B) * arma::trans(design.row(i)); |
| 278 |
} |
|
| 279 | ||
| 280 |
// calculate k^T * Cov * k, and overwrite k with the intermediate result: |
|
| 281 | 12950x |
trs(false, false, iwlsResults.qFactor, k); |
| 282 | 12950x |
double exp_T3T3 = arma::dot(k, k); |
| 283 | ||
| 284 |
// So the correction factor for the conditional marginal likelihood is (with the new terms): |
|
| 285 | 12950x |
double correctionFactor = - 1.0 / 8.0 * exp_T4 - |
| 286 | 12950x |
1.0 / 48.0 * exp_T6 + |
| 287 | 12950x |
5.0 / 24.0 * exp_T3T3; |
| 288 | ||
| 289 | 12950x |
if(bookkeep.debug) |
| 290 |
{
|
|
| 291 | ! |
Rprintf("\nNegLogUnnormZDens: Higher-order correction factor is %f", 1.0 + correctionFactor);
|
| 292 |
} |
|
| 293 | ||
| 294 | 12950x |
if(correctionFactor > -1.0) |
| 295 |
{
|
|
| 296 |
// since we return here the negative log of the conditional marg lik: |
|
| 297 | 12950x |
ret -= log1p(correctionFactor); |
| 298 |
} else {
|
|
| 299 | ! |
Rf_warning("negative value for correction factor! We are not using the higher order correction here.");
|
| 300 |
} |
|
| 301 | ||
| 302 | 12950x |
} // end if(higherOrderCorrection) |
| 303 | ||
| 304 |
// check finiteness of return value |
|
| 305 | 12950x |
if(! R_finite(ret)) |
| 306 |
{
|
|
| 307 | ! |
std::ostringstream stream; |
| 308 | ! |
stream << "NegLogUnnormZDens() got non-finite result " << ret << " for z=" << z; |
| 309 | ! |
throw std::domain_error(stream.str().c_str()); |
| 310 |
} |
|
| 311 |
} |
|
| 312 |
// catch errors in IWLS and non-finite result |
|
| 313 |
catch (std::domain_error& error) |
|
| 314 |
{
|
|
| 315 |
if(bookkeep.debug) |
|
| 316 |
{
|
|
| 317 |
Rprintf("For z=%f, the density value could not be computed because\n%s",
|
|
| 318 |
z, error.what()); |
|
| 319 |
} |
|
| 320 |
Rf_warning("for z=%f, the density value could not be computed for the following model:\n%s\nCheck for near-collinearity of covariates.",
|
|
| 321 |
z, mod.print(fpInfo).c_str()); |
|
| 322 | ||
| 323 |
// return NaN. This can be handled by the Brent optimize routine! It apparently replaces it (implicitely) by |
|
| 324 |
// the highest positive number. |
|
| 325 |
return R_NaN; |
|
| 326 |
} |
|
| 327 |
} |
|
| 328 | ||
| 329 |
// return value |
|
| 330 | 173591x |
return ret; |
| 331 | ||
| 332 |
} |